From 0145060644d71dad65868eee317e2faf23957077 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 7 Mar 2022 16:28:21 -0800 Subject: [PATCH 01/42] Add curve25519 field negation s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/4c8f40eb9fa85e3dcb61247bf8ef2cebc08f9c33 --- arm/curve25519/bignum_neg_p25519.S | 77 +++++++++++++++++++++++ x86_att/curve25519/bignum_neg_p25519.S | 85 ++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 arm/curve25519/bignum_neg_p25519.S create mode 100644 x86_att/curve25519/bignum_neg_p25519.S diff --git a/arm/curve25519/bignum_neg_p25519.S b/arm/curve25519/bignum_neg_p25519.S new file mode 100644 index 0000000000..20fd99c431 --- /dev/null +++ b/arm/curve25519/bignum_neg_p25519.S @@ -0,0 +1,77 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced +// Input x[4]; output z[4] +// +// extern void bignum_neg_p25519 +// (uint64_t z[static 4], uint64_t x[static 4]); +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- + + .globl bignum_neg_p25519 + .globl _bignum_neg_p25519 + .text + .balign 4 + +#define z x0 +#define x x1 + +#define d0 x2 +#define d1 x3 +#define d2 x4 +#define d3 x5 +#define c x6 +#define d x7 + +bignum_neg_p25519: +_bignum_neg_p25519: + +// Load the digits of x and compute [d3;d2;d1;d0] = (2^255 - 19) - x +// while also computing c = the OR of the digits of x + + ldp d0, d1, [x] + mov d, #-19 + orr c, d0, d1 + subs d0, d, d0 + mov d, #-1 + sbcs d1, d, d1 + ldp d2, d3, [x, #16] + orr c, c, d2 + sbcs d2, d, d2 + mov d, #0x7FFFFFFFFFFFFFFF + orr c, c, d3 + sbc d3, d, d3 + +// If in fact c = 0 then the result is zero, otherwise the main result + + cmp c, xzr + csel d0, d0, xzr, ne + csel d1, d1, xzr, ne + csel d2, d2, xzr, ne + csel d3, d3, xzr, ne + +// Write back result and return + + stp d0, d1, [z] + stp d2, d3, [z, #16] + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S new file mode 100644 index 0000000000..d246dc8cba --- /dev/null +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -0,0 +1,85 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced +// Input x[4]; output z[4] +// +// extern void bignum_neg_p25519 +// (uint64_t z[static 4], uint64_t x[static 4]); +// +// Standard x86-64 ABI: RDI = z, RSI = x +// ---------------------------------------------------------------------------- + + + .globl bignum_neg_p25519 + .globl _bignum_neg_p25519 + .text + +#define z %rdi +#define x %rsi + +#define q %rdx +#define n0 %rax +#define n1 %rcx +#define n2 %r8 +#define n3 %r9 + +#define c %r10 + +#define qshort %esi + +bignum_neg_p25519: +_bignum_neg_p25519: + +// Load the 4 digits of x and let q be an OR of all the digits + + movq (x), n0 + movq n0, q + movq 8(x), n1 + orq n1, q + movq 16(x), n2 + orq n2, q + movq 24(x), n3 + orq n3, q + +// Turn q into a strict x <> 0 bitmask, and c into a masked constant [-19] +// so that [q;q;q;c] = [2^256 - 19], masked according to nonzeroness of x + + negq q + sbbq q, q + movq $-19, c + andq q, c + +// Now just do [2^256 - 19] - x and then mask to 255 bits, +// which means in effect the required [2^255 - 19] - x + + subq n0, c + movq c, (z) + movq q, c + sbbq n1, c + movq c, 8(z) + movq q, c + sbbq n2, c + movq c, 16(z) + sbbq n3, q + btr $63, q + movq q, 24(z) + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif From 64b8ca5ae2694856acb91f86709ed34390e29756 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 10 Mar 2022 17:22:55 -0800 Subject: [PATCH 02/42] Regularize spacing of assembler files No labels on the same line as an instruction, and all instructions with initial 8 spaces. Also tweak a couple of instances in the testing code where "bignum_copy" was used inside a reference function. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/6d2bf13e21cfc38d5398f93a5e3de016f41e0dbb --- arm/curve25519/bignum_neg_p25519.S | 40 +- arm/fastmul/bignum_emontredc_8n.S | 382 +++++------ arm/generic/bignum_ge.S | 68 +- arm/generic/bignum_mul.S | 66 +- arm/generic/bignum_optsub.S | 28 +- arm/generic/bignum_sqr.S | 98 +-- arm/p384/bignum_add_p384.S | 78 +-- arm/p384/bignum_bigendian_6.S | 220 +++---- arm/p384/bignum_cmul_p384.S | 104 +-- arm/p384/bignum_deamont_p384.S | 94 +-- arm/p384/bignum_demont_p384.S | 56 +- arm/p384/bignum_double_p384.S | 60 +- arm/p384/bignum_half_p384.S | 56 +- arm/p384/bignum_littleendian_6.S | 208 +++--- arm/p384/bignum_mod_n384.S | 174 ++--- arm/p384/bignum_mod_n384_6.S | 52 +- arm/p384/bignum_mod_p384.S | 164 ++--- arm/p384/bignum_mod_p384_6.S | 44 +- arm/p384/bignum_montmul_p384.S | 510 +++++++-------- arm/p384/bignum_montmul_p384_alt.S | 418 ++++++------ arm/p384/bignum_montsqr_p384.S | 352 +++++----- arm/p384/bignum_montsqr_p384_alt.S | 290 ++++----- arm/p384/bignum_mux_6.S | 52 +- arm/p384/bignum_neg_p384.S | 52 +- arm/p384/bignum_nonzero_6.S | 22 +- arm/p384/bignum_optneg_p384.S | 62 +- arm/p384/bignum_sub_p384.S | 56 +- arm/p384/bignum_tomont_p384.S | 116 ++-- arm/p384/bignum_triple_p384.S | 86 +-- arm/p521/bignum_add_p521.S | 76 +-- arm/p521/bignum_cmul_p521.S | 128 ++-- arm/p521/bignum_deamont_p521.S | 86 +-- arm/p521/bignum_demont_p521.S | 44 +- arm/p521/bignum_double_p521.S | 44 +- arm/p521/bignum_fromlebytes_p521.S | 284 ++++---- arm/p521/bignum_half_p521.S | 52 +- arm/p521/bignum_mod_n521_9.S | 124 ++-- arm/p521/bignum_mod_p521_9.S | 66 +- arm/p521/bignum_montmul_p521.S | 688 ++++++++++---------- arm/p521/bignum_montmul_p521_alt.S | 822 +++++++++++------------ arm/p521/bignum_montsqr_p521.S | 868 ++++++++++++------------- arm/p521/bignum_montsqr_p521_alt.S | 524 +++++++-------- arm/p521/bignum_mul_p521.S | 698 ++++++++++---------- arm/p521/bignum_mul_p521_alt.S | 800 +++++++++++------------ arm/p521/bignum_neg_p521.S | 62 +- arm/p521/bignum_optneg_p521.S | 66 +- arm/p521/bignum_sqr_p521.S | 846 ++++++++++++------------ arm/p521/bignum_sqr_p521_alt.S | 502 +++++++------- arm/p521/bignum_sub_p521.S | 70 +- arm/p521/bignum_tolebytes_p521.S | 266 ++++---- arm/p521/bignum_tomont_p521.S | 88 +-- arm/p521/bignum_triple_p521.S | 88 +-- x86_att/curve25519/bignum_neg_p25519.S | 50 +- x86_att/p384/bignum_bigendian_6.S | 38 +- x86_att/p384/bignum_cmul_p384.S | 106 +-- x86_att/p384/bignum_cmul_p384_alt.S | 148 ++--- x86_att/p384/bignum_deamont_p384.S | 36 +- x86_att/p384/bignum_deamont_p384_alt.S | 38 +- x86_att/p384/bignum_demont_p384.S | 36 +- x86_att/p384/bignum_demont_p384_alt.S | 38 +- x86_att/p384/bignum_half_p384.S | 66 +- x86_att/p384/bignum_littleendian_6.S | 26 +- x86_att/p384/bignum_mod_n384.S | 230 +++---- x86_att/p384/bignum_mod_n384_alt.S | 236 +++---- x86_att/p384/bignum_mod_p384.S | 224 +++---- x86_att/p384/bignum_mod_p384_alt.S | 224 +++---- x86_att/p384/bignum_montmul_p384.S | 38 +- x86_att/p384/bignum_montmul_p384_alt.S | 40 +- x86_att/p384/bignum_montsqr_p384.S | 36 +- x86_att/p384/bignum_montsqr_p384_alt.S | 38 +- x86_att/p384/bignum_mux_6.S | 52 +- x86_att/p384/bignum_neg_p384.S | 62 +- x86_att/p384/bignum_nonzero_6.S | 20 +- x86_att/p384/bignum_optneg_p384.S | 86 +-- x86_att/p384/bignum_tomont_p384.S | 38 +- x86_att/p384/bignum_tomont_p384_alt.S | 40 +- x86_att/p384/bignum_triple_p384.S | 130 ++-- x86_att/p384/bignum_triple_p384_alt.S | 130 ++-- x86_att/p521/bignum_cmul_p521.S | 126 ++-- x86_att/p521/bignum_cmul_p521_alt.S | 190 +++--- x86_att/p521/bignum_fromlebytes_p521.S | 40 +- x86_att/p521/bignum_half_p521.S | 62 +- x86_att/p521/bignum_neg_p521.S | 80 +-- x86_att/p521/bignum_optneg_p521.S | 84 +-- x86_att/p521/bignum_tolebytes_p521.S | 38 +- x86_att/p521/bignum_triple_p521.S | 138 ++-- x86_att/p521/bignum_triple_p521_alt.S | 174 ++--- 87 files changed, 7219 insertions(+), 7219 deletions(-) diff --git a/arm/curve25519/bignum_neg_p25519.S b/arm/curve25519/bignum_neg_p25519.S index 20fd99c431..24b9df2bf6 100644 --- a/arm/curve25519/bignum_neg_p25519.S +++ b/arm/curve25519/bignum_neg_p25519.S @@ -44,33 +44,33 @@ _bignum_neg_p25519: // Load the digits of x and compute [d3;d2;d1;d0] = (2^255 - 19) - x // while also computing c = the OR of the digits of x - ldp d0, d1, [x] - mov d, #-19 - orr c, d0, d1 - subs d0, d, d0 - mov d, #-1 - sbcs d1, d, d1 - ldp d2, d3, [x, #16] - orr c, c, d2 - sbcs d2, d, d2 - mov d, #0x7FFFFFFFFFFFFFFF - orr c, c, d3 - sbc d3, d, d3 + ldp d0, d1, [x] + mov d, #-19 + orr c, d0, d1 + subs d0, d, d0 + mov d, #-1 + sbcs d1, d, d1 + ldp d2, d3, [x, #16] + orr c, c, d2 + sbcs d2, d, d2 + mov d, #0x7FFFFFFFFFFFFFFF + orr c, c, d3 + sbc d3, d, d3 // If in fact c = 0 then the result is zero, otherwise the main result - cmp c, xzr - csel d0, d0, xzr, ne - csel d1, d1, xzr, ne - csel d2, d2, xzr, ne - csel d3, d3, xzr, ne + cmp c, xzr + csel d0, d0, xzr, ne + csel d1, d1, xzr, ne + csel d2, d2, xzr, ne + csel d3, d3, xzr, ne // Write back result and return - stp d0, d1, [z] - stp d2, d3, [z, #16] + stp d0, d1, [z] + stp d2, d3, [z, #16] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/fastmul/bignum_emontredc_8n.S b/arm/fastmul/bignum_emontredc_8n.S index d9cf20574f..a2b2360c21 100644 --- a/arm/fastmul/bignum_emontredc_8n.S +++ b/arm/fastmul/bignum_emontredc_8n.S @@ -114,94 +114,94 @@ // ----------------------------------------------------------------------- .macro madd4 - mul u0, a0, b0 - mul u4, a1, b1 - mul u5, a2, b2 - mul u6, a3, b3 + mul u0, a0, b0 + mul u4, a1, b1 + mul u5, a2, b2 + mul u6, a3, b3 // Accumulate the simple products as [u7,u6,u5,u4,u0] - umulh c4, a0, b0 - adds u4, u4, c4 - umulh c4, a1, b1 - adcs u5, u5, c4 - umulh c4, a2, b2 - adcs u6, u6, c4 - umulh c4, a3, b3 - adc u7, c4, xzr + umulh c4, a0, b0 + adds u4, u4, c4 + umulh c4, a1, b1 + adcs u5, u5, c4 + umulh c4, a2, b2 + adcs u6, u6, c4 + umulh c4, a3, b3 + adc u7, c4, xzr // Add up the carry-in and the existing z contents - ldp u2, u3, [z] - adds c0, c0, u2 - adcs c1, c1, u3 - ldp u2, u3, [z, #16] - adcs c2, c2, u2 - adcs c3, c3, u3 - adc c4, xzr, xzr + ldp u2, u3, [z] + adds c0, c0, u2 + adcs c1, c1, u3 + ldp u2, u3, [z, #16] + adcs c2, c2, u2 + adcs c3, c3, u3 + adc c4, xzr, xzr // Multiply by B + 1 to get [u7;u6;u5;u4;u1;u0] - adds u1, u4, u0 - adcs u4, u5, u4 - adcs u5, u6, u5 - adcs u6, u7, u6 - adc u7, xzr, u7 + adds u1, u4, u0 + adcs u4, u5, u4 + adcs u5, u6, u5 + adcs u6, u7, u6 + adc u7, xzr, u7 // Multiply by B^2 + 1 to get [u6;u5;u4;u3;u2;u1;u0] - adds u2, u4, u0 - adcs u3, u5, u1 - adcs u4, u6, u4 - adcs u5, u7, u5 - adcs u6, xzr, u6 - adc u7, xzr, u7 + adds u2, u4, u0 + adcs u3, u5, u1 + adcs u4, u6, u4 + adcs u5, u7, u5 + adcs u6, xzr, u6 + adc u7, xzr, u7 // Add in the carry-in and original z contents - adds u0, u0, c0 - adcs u1, u1, c1 - adcs u2, u2, c2 - adcs u3, u3, c3 - adcs u4, u4, c4 - adcs u5, u5, xzr - adcs u6, u6, xzr - adc u7, u7, xzr + adds u0, u0, c0 + adcs u1, u1, c1 + adcs u2, u2, c2 + adcs u3, u3, c3 + adcs u4, u4, c4 + adcs u5, u5, xzr + adcs u6, u6, xzr + adc u7, u7, xzr // Now add in all the "complicated" terms. - muldiffnadd u6,u5, c,h,l,t, a2,a3, b3,b2 - adc u7, u7, c - - muldiffnadd u2,u1, c,h,l,t, a0,a1, b1,b0 - adcs u3, u3, c - adcs u4, u4, c - adcs u5, u5, c - adcs u6, u6, c - adc u7, u7, c - - muldiffnadd u5,u4, c,h,l,t, a1,a3, b3,b1 - adcs u6, u6, c - adc u7, u7, c - - muldiffnadd u3,u2, c,h,l,t, a0,a2, b2,b0 - adcs u4, u4, c - adcs u5, u5, c - adcs u6, u6, c - adc u7, u7, c - - muldiffnadd u4,u3, c,h,l,t, a0,a3, b3,b0 - adcs u5, u5, c - adcs u6, u6, c - adc u7, u7, c - muldiffnadd u4,u3, c,h,l,t, a1,a2, b2,b1 - adcs c1, u5, c - adcs c2, u6, c - adc c3, u7, c - mov c0, u4 - - stp u0, u1, [z] - stp u2, u3, [z, #16] + muldiffnadd u6,u5, c,h,l,t, a2,a3, b3,b2 + adc u7, u7, c + + muldiffnadd u2,u1, c,h,l,t, a0,a1, b1,b0 + adcs u3, u3, c + adcs u4, u4, c + adcs u5, u5, c + adcs u6, u6, c + adc u7, u7, c + + muldiffnadd u5,u4, c,h,l,t, a1,a3, b3,b1 + adcs u6, u6, c + adc u7, u7, c + + muldiffnadd u3,u2, c,h,l,t, a0,a2, b2,b0 + adcs u4, u4, c + adcs u5, u5, c + adcs u6, u6, c + adc u7, u7, c + + muldiffnadd u4,u3, c,h,l,t, a0,a3, b3,b0 + adcs u5, u5, c + adcs u6, u6, c + adc u7, u7, c + muldiffnadd u4,u3, c,h,l,t, a1,a2, b2,b1 + adcs c1, u5, c + adcs c2, u6, c + adc c3, u7, c + mov c0, u4 + + stp u0, u1, [z] + stp u2, u3, [z, #16] .endm // ***************************************************** @@ -211,22 +211,22 @@ bignum_emontredc_8n: _bignum_emontredc_8n: - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - stp x27, x28, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x28, [sp, #-16]! // Set up (k/4 - 1)<<5 which is used as inner count and pointer fixup // ns i = k/4 as the outer loop count. // At this point skip everything if k/4 = 0, returning our x0 = 0 value - lsr k4m1, x0, #2 - mov i, k4m1 - subs c, k4m1, #1 - bcc end - mov tc, xzr - lsl k4m1, c, #5 + lsr k4m1, x0, #2 + mov i, k4m1 + subs c, k4m1, #1 + bcc end + mov tc, xzr + lsl k4m1, c, #5 // Outer loop, one digit of Montgomery reduction adding in word * m. // Rather than propagating the carry to the end each time, we @@ -236,158 +236,158 @@ outerloop: // Load [u3;u2;u1;u0] = bottom 4 digits of the input at current window - ldp u0, u1, [z] - ldp u2, u3, [z, #16] + ldp u0, u1, [z] + ldp u2, u3, [z, #16] // Load the bottom 4 digits of m - ldp b0, b1, [m] - ldp b2, b3, [m, #16] + ldp b0, b1, [m] + ldp b2, b3, [m, #16] // Montgomery step 0 - mul a0, u0, w - mul c0, a0, b0 - mul c1, a0, b1 - mul c2, a0, b2 - mul c3, a0, b3 - adds u0, u0, c0 - umulh c0, a0, b0 - adcs u1, u1, c1 - umulh c1, a0, b1 - adcs u2, u2, c2 - umulh c2, a0, b2 - adcs u3, u3, c3 - umulh c3, a0, b3 - adc u4, xzr, xzr - adds u1, u1, c0 - adcs u2, u2, c1 - adcs u3, u3, c2 - adc u4, u4, c3 + mul a0, u0, w + mul c0, a0, b0 + mul c1, a0, b1 + mul c2, a0, b2 + mul c3, a0, b3 + adds u0, u0, c0 + umulh c0, a0, b0 + adcs u1, u1, c1 + umulh c1, a0, b1 + adcs u2, u2, c2 + umulh c2, a0, b2 + adcs u3, u3, c3 + umulh c3, a0, b3 + adc u4, xzr, xzr + adds u1, u1, c0 + adcs u2, u2, c1 + adcs u3, u3, c2 + adc u4, u4, c3 // Montgomery step 1 - mul a1, u1, w - mul c0, a1, b0 - mul c1, a1, b1 - mul c2, a1, b2 - mul c3, a1, b3 - adds u1, u1, c0 - umulh c0, a1, b0 - adcs u2, u2, c1 - umulh c1, a1, b1 - adcs u3, u3, c2 - umulh c2, a1, b2 - adcs u4, u4, c3 - umulh c3, a1, b3 - adc u5, xzr, xzr - adds u2, u2, c0 - adcs u3, u3, c1 - adcs u4, u4, c2 - adc u5, u5, c3 + mul a1, u1, w + mul c0, a1, b0 + mul c1, a1, b1 + mul c2, a1, b2 + mul c3, a1, b3 + adds u1, u1, c0 + umulh c0, a1, b0 + adcs u2, u2, c1 + umulh c1, a1, b1 + adcs u3, u3, c2 + umulh c2, a1, b2 + adcs u4, u4, c3 + umulh c3, a1, b3 + adc u5, xzr, xzr + adds u2, u2, c0 + adcs u3, u3, c1 + adcs u4, u4, c2 + adc u5, u5, c3 // Montgomery step 2 - mul a2, u2, w - mul c0, a2, b0 - mul c1, a2, b1 - mul c2, a2, b2 - mul c3, a2, b3 - adds u2, u2, c0 - umulh c0, a2, b0 - adcs u3, u3, c1 - umulh c1, a2, b1 - adcs u4, u4, c2 - umulh c2, a2, b2 - adcs u5, u5, c3 - umulh c3, a2, b3 - adc u6, xzr, xzr - adds u3, u3, c0 - adcs u4, u4, c1 - adcs u5, u5, c2 - adc u6, u6, c3 + mul a2, u2, w + mul c0, a2, b0 + mul c1, a2, b1 + mul c2, a2, b2 + mul c3, a2, b3 + adds u2, u2, c0 + umulh c0, a2, b0 + adcs u3, u3, c1 + umulh c1, a2, b1 + adcs u4, u4, c2 + umulh c2, a2, b2 + adcs u5, u5, c3 + umulh c3, a2, b3 + adc u6, xzr, xzr + adds u3, u3, c0 + adcs u4, u4, c1 + adcs u5, u5, c2 + adc u6, u6, c3 // Montgomery step 3. In the last four instructions we put the top in // the carry variables expected by the "madd" block next, which is why // the pattern is slightly different. - mul a3, u3, w - mul c0, a3, b0 - mul c1, a3, b1 - mul c2, a3, b2 - mul c3, a3, b3 - adds u3, u3, c0 - umulh c0, a3, b0 - adcs u4, u4, c1 - umulh c1, a3, b1 - adcs u5, u5, c2 - umulh c2, a3, b2 - adcs u6, u6, c3 - umulh c3, a3, b3 - adc u7, xzr, xzr - adds c0, u4, c0 - adcs c1, u5, c1 - adcs c2, u6, c2 - adc c3, u7, c3 + mul a3, u3, w + mul c0, a3, b0 + mul c1, a3, b1 + mul c2, a3, b2 + mul c3, a3, b3 + adds u3, u3, c0 + umulh c0, a3, b0 + adcs u4, u4, c1 + umulh c1, a3, b1 + adcs u5, u5, c2 + umulh c2, a3, b2 + adcs u6, u6, c3 + umulh c3, a3, b3 + adc u7, xzr, xzr + adds c0, u4, c0 + adcs c1, u5, c1 + adcs c2, u6, c2 + adc c3, u7, c3 // Stash the multipliers as expected by the bignum_emontredc interface // We don't use these ourselves again though; they stay in [a3;a2;a1;a0] - stp a0, a1, [z] - stp a2, a3, [z, #16] + stp a0, a1, [z] + stp a2, a3, [z, #16] // Repeated multiply-add block to do the k/4-1 remaining 4-digit chunks - cbz k4m1, madddone - mov j, k4m1 + cbz k4m1, madddone + mov j, k4m1 maddloop: - add m, m, #32 - add z, z, #32 - - ldp b0, b1, [m] - ldp b2, b3, [m, #16] - madd4 - subs j, j, #32 - bne maddloop + add m, m, #32 + add z, z, #32 + + ldp b0, b1, [m] + ldp b2, b3, [m, #16] + madd4 + subs j, j, #32 + bne maddloop madddone: // Add the carry out to the existing z contents, propagating the // top carry tc up by 32 places as we move "leftwards". - ldp u0, u1, [z, #32] - ldp u2, u3, [z, #48] - adds xzr, tc, tc - adcs u0, u0, c0 - adcs u1, u1, c1 - adcs u2, u2, c2 - adcs u3, u3, c3 - csetm tc, cs - stp u0, u1, [z, #32] - stp u2, u3, [z, #48] + ldp u0, u1, [z, #32] + ldp u2, u3, [z, #48] + adds xzr, tc, tc + adcs u0, u0, c0 + adcs u1, u1, c1 + adcs u2, u2, c2 + adcs u3, u3, c3 + csetm tc, cs + stp u0, u1, [z, #32] + stp u2, u3, [z, #48] // Compensate for the repeated bumps in m and z in the inner loop - sub z, z, k4m1 - sub m, m, k4m1 + sub z, z, k4m1 + sub m, m, k4m1 // Bump up z only and keep going - add z, z, #32 - subs i, i, #1 - bne outerloop + add z, z, #32 + subs i, i, #1 + bne outerloop // Return the top carry as 0 or 1 (it's currently a bitmask) - neg x0, tc + neg x0, tc end: - ldp x27, x28, [sp], #16 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/generic/bignum_ge.S b/arm/generic/bignum_ge.S index ba62be9cfc..19db769366 100644 --- a/arm/generic/bignum_ge.S +++ b/arm/generic/bignum_ge.S @@ -42,59 +42,59 @@ _bignum_ge: // Zero the main index counter for both branches - mov i, xzr + mov i, xzr // Speculatively form m := m - n and do case split - subs m, m, n - bcc ylonger + subs m, m, n + bcc ylonger // The case where x is longer or of the same size (m >= n) // Note that CF=1 initially by the fact that we reach this point - cbz n, xtest + cbz n, xtest xmainloop: - ldr a, [x, i, lsl #3] - ldr d, [y, i, lsl #3] - sbcs xzr, a, d - add i, i, #1 - sub n, n, #1 - cbnz n, xmainloop + ldr a, [x, i, lsl #3] + ldr d, [y, i, lsl #3] + sbcs xzr, a, d + add i, i, #1 + sub n, n, #1 + cbnz n, xmainloop xtest: - cbz m, xskip + cbz m, xskip xtoploop: - ldr a, [x, i, lsl #3] - sbcs xzr, a, xzr - add i, i, #1 - sub m, m, #1 - cbnz m, xtoploop + ldr a, [x, i, lsl #3] + sbcs xzr, a, xzr + add i, i, #1 + sub m, m, #1 + cbnz m, xtoploop xskip: - cset x0, cs - ret + cset x0, cs + ret // The case where y is longer (n > m) // The first "adds" also makes sure CF=1 initially in this branch ylonger: - adds m, m, n - cbz m, ytoploop - sub n, n, m + adds m, m, n + cbz m, ytoploop + sub n, n, m ymainloop: - ldr a, [x, i, lsl #3] - ldr d, [y, i, lsl #3] - sbcs xzr, a, d - add i, i, #1 - sub m, m, #1 - cbnz m, ymainloop + ldr a, [x, i, lsl #3] + ldr d, [y, i, lsl #3] + sbcs xzr, a, d + add i, i, #1 + sub m, m, #1 + cbnz m, ymainloop ytoploop: - ldr a, [y, i, lsl #3] - sbcs xzr, xzr, a - add i, i, #1 - sub n, n, #1 - cbnz n, ytoploop + ldr a, [y, i, lsl #3] + sbcs xzr, xzr, a + add i, i, #1 + sub n, n, #1 + cbnz n, ytoploop - cset x0, cs - ret + cset x0, cs + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/generic/bignum_mul.S b/arm/generic/bignum_mul.S index fae4f401a1..055ae3bf7f 100644 --- a/arm/generic/bignum_mul.S +++ b/arm/generic/bignum_mul.S @@ -55,71 +55,71 @@ _bignum_mul: // If p = 0 the result is trivial and nothing needs doing - cbz p, end + cbz p, end // initialize (h,l) = 0, saving c = 0 for inside the loop - mov l, xzr - mov h, xzr + mov l, xzr + mov h, xzr // Iterate outer loop from k = 0 ... k = p - 1 producing result digits - mov k, xzr + mov k, xzr outerloop: // Zero the carry for this stage - mov c, xzr + mov c, xzr // First let a = MAX 0 (k + 1 - n) and b = MIN (k + 1) m // We want to accumulate all x[i] * y[k - i] for a <= i < b - add a, k, #1 - cmp a, m - csel b, a, m, cc - subs a, a, n - csel a, a, xzr, cs + add a, k, #1 + cmp a, m + csel b, a, m, cc + subs a, a, n + csel a, a, xzr, cs // Set loop count i = b - a, and skip everything if it's <= 0 - subs i, b, a - bls innerend + subs i, b, a + bls innerend // Use temporary pointers xx = x + 8 * a and yy = y + 8 * (k - b) // Increment xx per iteration but just use loop counter with yy // So we start with [xx] = x[a] and [yy] = y[(k - b) + (b - a)] = y[k - a] - lsl xx, a, #3 - add xx, xx, x + lsl xx, a, #3 + add xx, xx, x - sub yy, k, b - lsl yy, yy, #3 - add yy, yy, y + sub yy, k, b + lsl yy, yy, #3 + add yy, yy, y // And index using the loop counter i = b - a, ..., i = 1 innerloop: - ldr a, [xx], #8 - ldr b, [yy, i, lsl #3] - mul d, a, b - umulh a, a, b - adds l, l, d - adcs h, h, a - adc c, c, xzr - subs i, i, #1 - bne innerloop + ldr a, [xx], #8 + ldr b, [yy, i, lsl #3] + mul d, a, b + umulh a, a, b + adds l, l, d + adcs h, h, a + adc c, c, xzr + subs i, i, #1 + bne innerloop innerend: - str l, [z, k, lsl #3] - mov l, h - mov h, c + str l, [z, k, lsl #3] + mov l, h + mov h, c - add k, k, #1 - cmp k, p - bcc outerloop // Inverted carry flag! + add k, k, #1 + cmp k, p + bcc outerloop // Inverted carry flag! end: - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/generic/bignum_optsub.S b/arm/generic/bignum_optsub.S index bda17156ed..fd952d1369 100644 --- a/arm/generic/bignum_optsub.S +++ b/arm/generic/bignum_optsub.S @@ -47,35 +47,35 @@ _bignum_optsub: // if k = 0 do nothing. This is also the right top carry in X0 - cbz k, end + cbz k, end // Convert p into a strict bitmask (same register in fact) - cmp p, xzr - csetm m, ne + cmp p, xzr + csetm m, ne // Set i = 0 *and* make sure initial ~CF = 0 - subs i, xzr, xzr + subs i, xzr, xzr // Main loop loop: - ldr a, [x, i] - ldr b, [y, i] - and b, b, m - sbcs a, a, b - str a, [z, i] - add i, i, #8 - sub k, k, #1 - cbnz k, loop + ldr a, [x, i] + ldr b, [y, i] + and b, b, m + sbcs a, a, b + str a, [z, i] + add i, i, #8 + sub k, k, #1 + cbnz k, loop // Return (non-inverted) carry flag - cset x0, cc + cset x0, cc end: - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/generic/bignum_sqr.S b/arm/generic/bignum_sqr.S index da1aa2eb29..f6e5a7439f 100644 --- a/arm/generic/bignum_sqr.S +++ b/arm/generic/bignum_sqr.S @@ -54,16 +54,16 @@ _bignum_sqr: // If p = 0 the result is trivial and nothing needs doing - cbz p, end + cbz p, end // initialize (hh,ll) = 0 - mov ll, xzr - mov hh, xzr + mov ll, xzr + mov hh, xzr // Iterate outer loop from k = 0 ... k = p - 1 producing result digits - mov k, xzr + mov k, xzr outerloop: // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n @@ -72,80 +72,80 @@ outerloop: // 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n // Initialize i = bot; in fact just compute bot as i directly. - add i, k, #1 - lsr htop, i, #1 - cmp htop, n - csel htop, htop, n, cc - subs i, i, n - csel i, i, xzr, cs + add i, k, #1 + lsr htop, i, #1 + cmp htop, n + csel htop, htop, n, cc + subs i, i, n + csel i, i, xzr, cs // Initialize the three-part local sum (c,h,l) - mov l, xzr - mov h, xzr - mov c, xzr + mov l, xzr + mov h, xzr + mov c, xzr // If htop <= bot then main doubled part of the sum is empty - cmp htop, i - bls nosumming + cmp htop, i + bls nosumming // Use a moving pointer for [y] = x[k-i] for the cofactor - sub y, k, i - lsl y, y, #3 - add y, x, y + sub y, k, i + lsl y, y, #3 + add y, x, y // Do the main part of the sum x[i] * x[k - i] for 2 * i < k innerloop: - ldr a, [x, i, lsl #3] - ldr b, [y], #-8 - mul d, a, b - umulh a, a, b - adds l, l, d - adcs h, h, a - adc c, c, xzr - add i, i, #1 - cmp i, htop - bne innerloop + ldr a, [x, i, lsl #3] + ldr b, [y], #-8 + mul d, a, b + umulh a, a, b + adds l, l, d + adcs h, h, a + adc c, c, xzr + add i, i, #1 + cmp i, htop + bne innerloop // Now double it - adds l, l, l - adcs h, h, h - adc c, c, c + adds l, l, l + adcs h, h, h + adc c, c, c // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term nosumming: - ands xzr, k, #1 - bne innerend - cmp i, n - bcs innerend + ands xzr, k, #1 + bne innerend + cmp i, n + bcs innerend - ldr a, [x, i, lsl #3] - mul d, a, a - umulh a, a, a - adds ll, ll, d - adcs hh, hh, a - adc c, c, xzr + ldr a, [x, i, lsl #3] + mul d, a, a + umulh a, a, a + adds ll, ll, d + adcs hh, hh, a + adc c, c, xzr // Now add the local sum into the global sum, store and shift innerend: - adds l, l, ll - str l, [z, k, lsl #3] - adcs ll, h, hh - adc hh, c, xzr + adds l, l, ll + str l, [z, k, lsl #3] + adcs ll, h, hh + adc hh, c, xzr - add k, k, #1 - cmp k, p - bcc outerloop + add k, k, #1 + cmp k, p + bcc outerloop end: - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_add_p384.S b/arm/p384/bignum_add_p384.S index 74c1a29940..a2558da130 100644 --- a/arm/p384/bignum_add_p384.S +++ b/arm/p384/bignum_add_p384.S @@ -46,59 +46,59 @@ _bignum_add_p384: // First just add the numbers as c + [d5; d4; d3; d2; d1; d0] - ldp d0, d1, [x] - ldp l, c, [y] - adds d0, d0, l - adcs d1, d1, c - ldp d2, d3, [x, #16] - ldp l, c, [y, #16] - adcs d2, d2, l - adcs d3, d3, c - ldp d4, d5, [x, #32] - ldp l, c, [y, #32] - adcs d4, d4, l - adcs d5, d5, c - adc c, xzr, xzr + ldp d0, d1, [x] + ldp l, c, [y] + adds d0, d0, l + adcs d1, d1, c + ldp d2, d3, [x, #16] + ldp l, c, [y, #16] + adcs d2, d2, l + adcs d3, d3, c + ldp d4, d5, [x, #32] + ldp l, c, [y, #32] + adcs d4, d4, l + adcs d5, d5, c + adc c, xzr, xzr // Now compare [d5; d4; d3; d2; d1; d0] with p_384 - mov l, #0x00000000ffffffff - subs xzr, d0, l - mov l, #0xffffffff00000000 - sbcs xzr, d1, l - mov l, #0xfffffffffffffffe - sbcs xzr, d2, l - adcs xzr, d3, xzr - adcs xzr, d4, xzr - adcs xzr, d5, xzr + mov l, #0x00000000ffffffff + subs xzr, d0, l + mov l, #0xffffffff00000000 + sbcs xzr, d1, l + mov l, #0xfffffffffffffffe + sbcs xzr, d2, l + adcs xzr, d3, xzr + adcs xzr, d4, xzr + adcs xzr, d5, xzr // Now CF is set (because of inversion) if (x + y) % 2^384 >= p_384 // Thus we want to correct if either this is set or the original carry c was - adcs c, c, xzr - csetm c, ne + adcs c, c, xzr + csetm c, ne // Now correct by subtracting masked p_384 - mov l, #0x00000000ffffffff - and l, l, c - subs d0, d0, l - eor l, l, c - sbcs d1, d1, l - mov l, #0xfffffffffffffffe - and l, l, c - sbcs d2, d2, l - sbcs d3, d3, c - sbcs d4, d4, c - sbc d5, d5, c + mov l, #0x00000000ffffffff + and l, l, c + subs d0, d0, l + eor l, l, c + sbcs d1, d1, l + mov l, #0xfffffffffffffffe + and l, l, c + sbcs d2, d2, l + sbcs d3, d3, c + sbcs d4, d4, c + sbc d5, d5, c // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_bigendian_6.S b/arm/p384/bignum_bigendian_6.S index 0e9a9bc595..84bbf01425 100644 --- a/arm/p384/bignum_bigendian_6.S +++ b/arm/p384/bignum_bigendian_6.S @@ -66,122 +66,122 @@ _bignum_tobebytes_6: // 0 and 5 words - ldrb dshort, [x, #7] - extr a, d, xzr, #8 - ldrb dshort, [x, #6] - extr a, d, a, #8 - ldrb dshort, [x, #5] - extr a, d, a, #8 - ldrb dshort, [x, #4] - extr a, d, a, #8 - ldrb dshort, [x, #3] - extr a, d, a, #8 - ldrb dshort, [x, #2] - extr a, d, a, #8 - ldrb dshort, [x, #1] - extr a, d, a, #8 - ldrb dshort, [x] - extr a, d, a, #8 - - ldrb dshort, [x, #47] - extr c, d, xzr, #8 - ldrb dshort, [x, #46] - extr c, d, c, #8 - ldrb dshort, [x, #45] - extr c, d, c, #8 - ldrb dshort, [x, #44] - extr c, d, c, #8 - ldrb dshort, [x, #43] - extr c, d, c, #8 - ldrb dshort, [x, #42] - extr c, d, c, #8 - ldrb dshort, [x, #41] - extr c, d, c, #8 - ldrb dshort, [x, #40] - extr c, d, c, #8 - - str a, [z, #40] - str c, [z] + ldrb dshort, [x, #7] + extr a, d, xzr, #8 + ldrb dshort, [x, #6] + extr a, d, a, #8 + ldrb dshort, [x, #5] + extr a, d, a, #8 + ldrb dshort, [x, #4] + extr a, d, a, #8 + ldrb dshort, [x, #3] + extr a, d, a, #8 + ldrb dshort, [x, #2] + extr a, d, a, #8 + ldrb dshort, [x, #1] + extr a, d, a, #8 + ldrb dshort, [x] + extr a, d, a, #8 + + ldrb dshort, [x, #47] + extr c, d, xzr, #8 + ldrb dshort, [x, #46] + extr c, d, c, #8 + ldrb dshort, [x, #45] + extr c, d, c, #8 + ldrb dshort, [x, #44] + extr c, d, c, #8 + ldrb dshort, [x, #43] + extr c, d, c, #8 + ldrb dshort, [x, #42] + extr c, d, c, #8 + ldrb dshort, [x, #41] + extr c, d, c, #8 + ldrb dshort, [x, #40] + extr c, d, c, #8 + + str a, [z, #40] + str c, [z] // 1 and 4 words - ldrb dshort, [x, #15] - extr a, d, xzr, #8 - ldrb dshort, [x, #14] - extr a, d, a, #8 - ldrb dshort, [x, #13] - extr a, d, a, #8 - ldrb dshort, [x, #12] - extr a, d, a, #8 - ldrb dshort, [x, #11] - extr a, d, a, #8 - ldrb dshort, [x, #10] - extr a, d, a, #8 - ldrb dshort, [x, #9] - extr a, d, a, #8 - ldrb dshort, [x, #8] - extr a, d, a, #8 - - ldrb dshort, [x, #39] - extr c, d, xzr, #8 - ldrb dshort, [x, #38] - extr c, d, c, #8 - ldrb dshort, [x, #37] - extr c, d, c, #8 - ldrb dshort, [x, #36] - extr c, d, c, #8 - ldrb dshort, [x, #35] - extr c, d, c, #8 - ldrb dshort, [x, #34] - extr c, d, c, #8 - ldrb dshort, [x, #33] - extr c, d, c, #8 - ldrb dshort, [x, #32] - extr c, d, c, #8 - - str a, [z, #32] - str c, [z, #8] + ldrb dshort, [x, #15] + extr a, d, xzr, #8 + ldrb dshort, [x, #14] + extr a, d, a, #8 + ldrb dshort, [x, #13] + extr a, d, a, #8 + ldrb dshort, [x, #12] + extr a, d, a, #8 + ldrb dshort, [x, #11] + extr a, d, a, #8 + ldrb dshort, [x, #10] + extr a, d, a, #8 + ldrb dshort, [x, #9] + extr a, d, a, #8 + ldrb dshort, [x, #8] + extr a, d, a, #8 + + ldrb dshort, [x, #39] + extr c, d, xzr, #8 + ldrb dshort, [x, #38] + extr c, d, c, #8 + ldrb dshort, [x, #37] + extr c, d, c, #8 + ldrb dshort, [x, #36] + extr c, d, c, #8 + ldrb dshort, [x, #35] + extr c, d, c, #8 + ldrb dshort, [x, #34] + extr c, d, c, #8 + ldrb dshort, [x, #33] + extr c, d, c, #8 + ldrb dshort, [x, #32] + extr c, d, c, #8 + + str a, [z, #32] + str c, [z, #8] // 2 and 3 words - ldrb dshort, [x, #23] - extr a, d, xzr, #8 - ldrb dshort, [x, #22] - extr a, d, a, #8 - ldrb dshort, [x, #21] - extr a, d, a, #8 - ldrb dshort, [x, #20] - extr a, d, a, #8 - ldrb dshort, [x, #19] - extr a, d, a, #8 - ldrb dshort, [x, #18] - extr a, d, a, #8 - ldrb dshort, [x, #17] - extr a, d, a, #8 - ldrb dshort, [x, #16] - extr a, d, a, #8 - - ldrb dshort, [x, #31] - extr c, d, xzr, #8 - ldrb dshort, [x, #30] - extr c, d, c, #8 - ldrb dshort, [x, #29] - extr c, d, c, #8 - ldrb dshort, [x, #28] - extr c, d, c, #8 - ldrb dshort, [x, #27] - extr c, d, c, #8 - ldrb dshort, [x, #26] - extr c, d, c, #8 - ldrb dshort, [x, #25] - extr c, d, c, #8 - ldrb dshort, [x, #24] - extr c, d, c, #8 - - str a, [z, #24] - str c, [z, #16] - - ret + ldrb dshort, [x, #23] + extr a, d, xzr, #8 + ldrb dshort, [x, #22] + extr a, d, a, #8 + ldrb dshort, [x, #21] + extr a, d, a, #8 + ldrb dshort, [x, #20] + extr a, d, a, #8 + ldrb dshort, [x, #19] + extr a, d, a, #8 + ldrb dshort, [x, #18] + extr a, d, a, #8 + ldrb dshort, [x, #17] + extr a, d, a, #8 + ldrb dshort, [x, #16] + extr a, d, a, #8 + + ldrb dshort, [x, #31] + extr c, d, xzr, #8 + ldrb dshort, [x, #30] + extr c, d, c, #8 + ldrb dshort, [x, #29] + extr c, d, c, #8 + ldrb dshort, [x, #28] + extr c, d, c, #8 + ldrb dshort, [x, #27] + extr c, d, c, #8 + ldrb dshort, [x, #26] + extr c, d, c, #8 + ldrb dshort, [x, #25] + extr c, d, c, #8 + ldrb dshort, [x, #24] + extr c, d, c, #8 + + str a, [z, #24] + str c, [z, #16] + + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_cmul_p384.S b/arm/p384/bignum_cmul_p384.S index 38ef03c4d6..2c87e82fd5 100644 --- a/arm/p384/bignum_cmul_p384.S +++ b/arm/p384/bignum_cmul_p384.S @@ -64,27 +64,27 @@ _bignum_cmul_p384_alt: // First do the multiply, straightforwardly, getting [h; d5; ...; d0] - ldp a0, a1, [x] - ldp a2, a3, [x, #16] - ldp a4, a5, [x, #32] - mul d0, c, a0 - mul d1, c, a1 - mul d2, c, a2 - mul d3, c, a3 - mul d4, c, a4 - mul d5, c, a5 - umulh a0, c, a0 - umulh a1, c, a1 - umulh a2, c, a2 - umulh a3, c, a3 - umulh a4, c, a4 - umulh h, c, a5 - adds d1, d1, a0 - adcs d2, d2, a1 - adcs d3, d3, a2 - adcs d4, d4, a3 - adcs d5, d5, a4 - adc h, h, xzr + ldp a0, a1, [x] + ldp a2, a3, [x, #16] + ldp a4, a5, [x, #32] + mul d0, c, a0 + mul d1, c, a1 + mul d2, c, a2 + mul d3, c, a3 + mul d4, c, a4 + mul d5, c, a5 + umulh a0, c, a0 + umulh a1, c, a1 + umulh a2, c, a2 + umulh a3, c, a3 + umulh a4, c, a4 + umulh h, c, a5 + adds d1, d1, a0 + adcs d2, d2, a1 + adcs d3, d3, a2 + adcs d4, d4, a3 + adcs d5, d5, a4 + adc h, h, xzr // Let h be the top word of this intermediate product and l the low 6 words. // By the range hypothesis on the input, we know h1 = h + 1 does not wrap @@ -106,47 +106,47 @@ _bignum_cmul_p384_alt: // = l + 2^128 * (h + 1) + 2^96 * (h + 1) - 2^32 * (h + 1) + (h + 1) // = l + 2^128 * (h + 1) + 2^96 * h + 2^32 * ~h + (h + 1) - add h1, h, #1 - orn hn, xzr, h - lsl a0, hn, #32 - extr a1, h, hn, #32 - lsr a2, h, #32 + add h1, h, #1 + orn hn, xzr, h + lsl a0, hn, #32 + extr a1, h, hn, #32 + lsr a2, h, #32 - adds a0, a0, h1 - adcs a1, a1, xzr - adcs a2, a2, h1 - adc a3, xzr, xzr + adds a0, a0, h1 + adcs a1, a1, xzr + adcs a2, a2, h1 + adc a3, xzr, xzr - adds d0, d0, a0 - adcs d1, d1, a1 - adcs d2, d2, a2 - adcs d3, d3, a3 - adcs d4, d4, xzr - adcs d5, d5, xzr + adds d0, d0, a0 + adcs d1, d1, a1 + adcs d2, d2, a2 + adcs d3, d3, a3 + adcs d4, d4, xzr + adcs d5, d5, xzr // Catch the carry and do a masked addition of p_384 - csetm m, cc + csetm m, cc - mov l, #0x00000000ffffffff - and l, l, m - adds d0, d0, l - eor l, l, m - adcs d1, d1, l - mov l, #0xfffffffffffffffe - and l, l, m - adcs d2, d2, l - adcs d3, d3, m - adcs d4, d4, m - adc d5, d5, m + mov l, #0x00000000ffffffff + and l, l, m + adds d0, d0, l + eor l, l, m + adcs d1, d1, l + mov l, #0xfffffffffffffffe + and l, l, m + adcs d2, d2, l + adcs d3, d3, m + adcs d4, d4, m + adc d5, d5, m // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_deamont_p384.S b/arm/p384/bignum_deamont_p384.S index bc6641eda3..a322a8c4a2 100644 --- a/arm/p384/bignum_deamont_p384.S +++ b/arm/p384/bignum_deamont_p384.S @@ -46,29 +46,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ /* Recycle d0 (which we know gets implicitly cancelled) to store it */ \ - lsl t1, d0, #32; \ - add d0, t1, d0; \ + lsl t1, d0, #32; \ + add d0, t1, d0; \ /* Now let [t2;t1] = 2^64 * w - w + w_hi where w_hi = floor(w/2^32) */ \ /* We need to subtract 2^32 * this, and we can ignore its lower 32 */ \ /* bits since by design it will cancel anyway; we only need the w_hi */ \ /* part to get the carry propagation going. */ \ - lsr t1, d0, #32; \ - subs t1, t1, d0; \ - sbc t2, d0, xzr; \ + lsr t1, d0, #32; \ + subs t1, t1, d0; \ + sbc t2, d0, xzr; \ /* Now select in t1 the field to subtract from d1 */ \ - extr t1, t2, t1, #32; \ + extr t1, t2, t1, #32; \ /* And now get the terms to subtract from d2 and d3 */ \ - lsr t2, t2, #32; \ - adds t2, t2, d0; \ - adc t3, xzr, xzr; \ + lsr t2, t2, #32; \ + adds t2, t2, d0; \ + adc t3, xzr, xzr; \ /* Do the subtraction of that portion */ \ - subs d1, d1, t1; \ - sbcs d2, d2, t2; \ - sbcs d3, d3, t3; \ - sbcs d4, d4, xzr; \ - sbcs d5, d5, xzr; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ /* Now effectively add 2^384 * w by taking d0 as the input for last sbc */ \ - sbc d6, d0, xzr + sbc d6, d0, xzr // Input parameters @@ -97,60 +97,60 @@ _bignum_deamont_p384_alt: // Set up an initial window with the input x and an extra leading zero - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Systematically scroll left doing 1-step reductions - montreds(d0,d5,d4,d3,d2,d1,d0, u,v,w) + montreds(d0,d5,d4,d3,d2,d1,d0, u,v,w) - montreds(d1,d0,d5,d4,d3,d2,d1, u,v,w) + montreds(d1,d0,d5,d4,d3,d2,d1, u,v,w) - montreds(d2,d1,d0,d5,d4,d3,d2, u,v,w) + montreds(d2,d1,d0,d5,d4,d3,d2, u,v,w) - montreds(d3,d2,d1,d0,d5,d4,d3, u,v,w) + montreds(d3,d2,d1,d0,d5,d4,d3, u,v,w) - montreds(d4,d3,d2,d1,d0,d5,d4, u,v,w) + montreds(d4,d3,d2,d1,d0,d5,d4, u,v,w) - montreds(d5,d4,d3,d2,d1,d0,d5, u,v,w) + montreds(d5,d4,d3,d2,d1,d0,d5, u,v,w) // Now compare end result in [d5;d4;d3;d2;d1;d0] = dd with p_384 by *adding* // 2^384 - p_384 = [0;0;0;w;v;u]. This will set CF if // dd + (2^384 - p_384) >= 2^384, hence iff dd >= p_384 - mov u, #0xffffffff00000001 - mov v, #0x00000000ffffffff - mov w, #0x0000000000000001 + mov u, #0xffffffff00000001 + mov v, #0x00000000ffffffff + mov w, #0x0000000000000001 - adds xzr, d0, u - adcs xzr, d1, v - adcs xzr, d2, w - adcs xzr, d3, xzr - adcs xzr, d4, xzr - adcs xzr, d5, xzr + adds xzr, d0, u + adcs xzr, d1, v + adcs xzr, d2, w + adcs xzr, d3, xzr + adcs xzr, d4, xzr + adcs xzr, d5, xzr // Convert the condition dd >= p_384 into a bitmask in w and do a masked // subtraction of p_384, via a masked addition of 2^384 - p_384: - csetm w, cs - and u, u, w - adds d0, d0, u - and v, v, w - adcs d1, d1, v - and w, w, #1 - adcs d2, d2, w - adcs d3, d3, xzr - adcs d4, d4, xzr - adc d5, d5, xzr + csetm w, cs + and u, u, w + adds d0, d0, u + and v, v, w + adcs d1, d1, v + and w, w, #1 + adcs d2, d2, w + adcs d3, d3, xzr + adcs d4, d4, xzr + adc d5, d5, xzr // Store it back - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_demont_p384.S b/arm/p384/bignum_demont_p384.S index d307a14950..00a0ffcacc 100644 --- a/arm/p384/bignum_demont_p384.S +++ b/arm/p384/bignum_demont_p384.S @@ -46,29 +46,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ /* Recycle d0 (which we know gets implicitly cancelled) to store it */ \ - lsl t1, d0, #32; \ - add d0, t1, d0; \ + lsl t1, d0, #32; \ + add d0, t1, d0; \ /* Now let [t2;t1] = 2^64 * w - w + w_hi where w_hi = floor(w/2^32) */ \ /* We need to subtract 2^32 * this, and we can ignore its lower 32 */ \ /* bits since by design it will cancel anyway; we only need the w_hi */ \ /* part to get the carry propagation going. */ \ - lsr t1, d0, #32; \ - subs t1, t1, d0; \ - sbc t2, d0, xzr; \ + lsr t1, d0, #32; \ + subs t1, t1, d0; \ + sbc t2, d0, xzr; \ /* Now select in t1 the field to subtract from d1 */ \ - extr t1, t2, t1, #32; \ + extr t1, t2, t1, #32; \ /* And now get the terms to subtract from d2 and d3 */ \ - lsr t2, t2, #32; \ - adds t2, t2, d0; \ - adc t3, xzr, xzr; \ + lsr t2, t2, #32; \ + adds t2, t2, d0; \ + adc t3, xzr, xzr; \ /* Do the subtraction of that portion */ \ - subs d1, d1, t1; \ - sbcs d2, d2, t2; \ - sbcs d3, d3, t3; \ - sbcs d4, d4, xzr; \ - sbcs d5, d5, xzr; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ /* Now effectively add 2^384 * w by taking d0 as the input for last sbc */ \ - sbc d6, d0, xzr + sbc d6, d0, xzr // Input parameters @@ -97,31 +97,31 @@ _bignum_demont_p384_alt: // Set up an initial window with the input x and an extra leading zero - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Systematically scroll left doing 1-step reductions - montreds(d0,d5,d4,d3,d2,d1,d0, u,v,w) + montreds(d0,d5,d4,d3,d2,d1,d0, u,v,w) - montreds(d1,d0,d5,d4,d3,d2,d1, u,v,w) + montreds(d1,d0,d5,d4,d3,d2,d1, u,v,w) - montreds(d2,d1,d0,d5,d4,d3,d2, u,v,w) + montreds(d2,d1,d0,d5,d4,d3,d2, u,v,w) - montreds(d3,d2,d1,d0,d5,d4,d3, u,v,w) + montreds(d3,d2,d1,d0,d5,d4,d3, u,v,w) - montreds(d4,d3,d2,d1,d0,d5,d4, u,v,w) + montreds(d4,d3,d2,d1,d0,d5,d4, u,v,w) - montreds(d5,d4,d3,d2,d1,d0,d5, u,v,w) + montreds(d5,d4,d3,d2,d1,d0,d5, u,v,w) // This is already our answer with no correction needed - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_double_p384.S b/arm/p384/bignum_double_p384.S index 722df320d5..4d1189147a 100644 --- a/arm/p384/bignum_double_p384.S +++ b/arm/p384/bignum_double_p384.S @@ -51,48 +51,48 @@ _bignum_double_p384: // Double the input number as 2 * x = c + [d5; d4; d3; d2; d1; d0] // It's worth considering doing this with extr...63 instead - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] - adds d0, d0, d0 - adcs d1, d1, d1 - adcs d2, d2, d2 - adcs d3, d3, d3 - adcs d4, d4, d4 - adcs d5, d5, d5 - adc c, xzr, xzr + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] + adds d0, d0, d0 + adcs d1, d1, d1 + adcs d2, d2, d2 + adcs d3, d3, d3 + adcs d4, d4, d4 + adcs d5, d5, d5 + adc c, xzr, xzr // Subtract p_384 to give 2 * x - p_384 = c + [n5; n4; n3; n2; n1; n0] - mov n0, #0x00000000ffffffff - subs n0, d0, n0 - mov n1, #0xffffffff00000000 - sbcs n1, d1, n1 - mov n2, #0xfffffffffffffffe - sbcs n2, d2, n2 - adcs n3, d3, xzr - adcs n4, d4, xzr - adcs n5, d5, xzr - sbcs c, c, xzr + mov n0, #0x00000000ffffffff + subs n0, d0, n0 + mov n1, #0xffffffff00000000 + sbcs n1, d1, n1 + mov n2, #0xfffffffffffffffe + sbcs n2, d2, n2 + adcs n3, d3, xzr + adcs n4, d4, xzr + adcs n5, d5, xzr + sbcs c, c, xzr // Now CF is set (because of inversion) if 2 * x >= p_384, in which case the // correct result is [n5; n4; n3; n2; n1; n0], otherwise // [d5; d4; d3; d2; d1; d0] - csel d0, d0, n0, cc - csel d1, d1, n1, cc - csel d2, d2, n2, cc - csel d3, d3, n3, cc - csel d4, d4, n4, cc - csel d5, d5, n5, cc + csel d0, d0, n0, cc + csel d1, d1, n1, cc + csel d2, d2, n2, cc + csel d3, d3, n3, cc + csel d4, d4, n4, cc + csel d5, d5, n5, cc // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_half_p384.S b/arm/p384/bignum_half_p384.S index d8ee54ed68..f0ce12864a 100644 --- a/arm/p384/bignum_half_p384.S +++ b/arm/p384/bignum_half_p384.S @@ -48,49 +48,49 @@ _bignum_half_p384: // Load the 4 digits of x - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Get a bitmask corresponding to the lowest bit of the input - and m, d0, #1 - neg m, m + and m, d0, #1 + neg m, m // Do a masked addition of p_384, catching carry in a 7th word - mov n, #0x00000000ffffffff - and n, n, m - adds d0, d0, n - mov n, #0xffffffff00000000 - and n, n, m - adcs d1, d1, n - mov n, #0xfffffffffffffffe - and n, n, m - adcs d2, d2, n - adcs d3, d3, m - adcs d4, d4, m - adcs d5, d5, m - adc d6, xzr, xzr + mov n, #0x00000000ffffffff + and n, n, m + adds d0, d0, n + mov n, #0xffffffff00000000 + and n, n, m + adcs d1, d1, n + mov n, #0xfffffffffffffffe + and n, n, m + adcs d2, d2, n + adcs d3, d3, m + adcs d4, d4, m + adcs d5, d5, m + adc d6, xzr, xzr // Now shift that sum right one place - extr d0, d1, d0, #1 - extr d1, d2, d1, #1 - extr d2, d3, d2, #1 - extr d3, d4, d3, #1 - extr d4, d5, d4, #1 - extr d5, d6, d5, #1 + extr d0, d1, d0, #1 + extr d1, d2, d1, #1 + extr d2, d3, d2, #1 + extr d3, d4, d3, #1 + extr d4, d5, d4, #1 + extr d5, d6, d5, #1 // Store back - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_littleendian_6.S b/arm/p384/bignum_littleendian_6.S index 22c7cdcc7b..30d36a53bf 100644 --- a/arm/p384/bignum_littleendian_6.S +++ b/arm/p384/bignum_littleendian_6.S @@ -61,125 +61,125 @@ _bignum_tolebytes_6: // word 0 - ldrb dshort, [x] - extr a, d, xzr, #8 - ldrb dshort, [x, #1] - extr a, d, a, #8 - ldrb dshort, [x, #2] - extr a, d, a, #8 - ldrb dshort, [x, #3] - extr a, d, a, #8 - ldrb dshort, [x, #4] - extr a, d, a, #8 - ldrb dshort, [x, #5] - extr a, d, a, #8 - ldrb dshort, [x, #6] - extr a, d, a, #8 - ldrb dshort, [x, #7] - extr a, d, a, #8 - str a, [z] + ldrb dshort, [x] + extr a, d, xzr, #8 + ldrb dshort, [x, #1] + extr a, d, a, #8 + ldrb dshort, [x, #2] + extr a, d, a, #8 + ldrb dshort, [x, #3] + extr a, d, a, #8 + ldrb dshort, [x, #4] + extr a, d, a, #8 + ldrb dshort, [x, #5] + extr a, d, a, #8 + ldrb dshort, [x, #6] + extr a, d, a, #8 + ldrb dshort, [x, #7] + extr a, d, a, #8 + str a, [z] // word 1 - ldrb dshort, [x, #8] - extr a, d, xzr, #8 - ldrb dshort, [x, #9] - extr a, d, a, #8 - ldrb dshort, [x, #10] - extr a, d, a, #8 - ldrb dshort, [x, #11] - extr a, d, a, #8 - ldrb dshort, [x, #12] - extr a, d, a, #8 - ldrb dshort, [x, #13] - extr a, d, a, #8 - ldrb dshort, [x, #14] - extr a, d, a, #8 - ldrb dshort, [x, #15] - extr a, d, a, #8 - str a, [z, #8] + ldrb dshort, [x, #8] + extr a, d, xzr, #8 + ldrb dshort, [x, #9] + extr a, d, a, #8 + ldrb dshort, [x, #10] + extr a, d, a, #8 + ldrb dshort, [x, #11] + extr a, d, a, #8 + ldrb dshort, [x, #12] + extr a, d, a, #8 + ldrb dshort, [x, #13] + extr a, d, a, #8 + ldrb dshort, [x, #14] + extr a, d, a, #8 + ldrb dshort, [x, #15] + extr a, d, a, #8 + str a, [z, #8] // word 2 - ldrb dshort, [x, #16] - extr a, d, xzr, #8 - ldrb dshort, [x, #17] - extr a, d, a, #8 - ldrb dshort, [x, #18] - extr a, d, a, #8 - ldrb dshort, [x, #19] - extr a, d, a, #8 - ldrb dshort, [x, #20] - extr a, d, a, #8 - ldrb dshort, [x, #21] - extr a, d, a, #8 - ldrb dshort, [x, #22] - extr a, d, a, #8 - ldrb dshort, [x, #23] - extr a, d, a, #8 - str a, [z, #16] + ldrb dshort, [x, #16] + extr a, d, xzr, #8 + ldrb dshort, [x, #17] + extr a, d, a, #8 + ldrb dshort, [x, #18] + extr a, d, a, #8 + ldrb dshort, [x, #19] + extr a, d, a, #8 + ldrb dshort, [x, #20] + extr a, d, a, #8 + ldrb dshort, [x, #21] + extr a, d, a, #8 + ldrb dshort, [x, #22] + extr a, d, a, #8 + ldrb dshort, [x, #23] + extr a, d, a, #8 + str a, [z, #16] // word 3 - ldrb dshort, [x, #24] - extr a, d, xzr, #8 - ldrb dshort, [x, #25] - extr a, d, a, #8 - ldrb dshort, [x, #26] - extr a, d, a, #8 - ldrb dshort, [x, #27] - extr a, d, a, #8 - ldrb dshort, [x, #28] - extr a, d, a, #8 - ldrb dshort, [x, #29] - extr a, d, a, #8 - ldrb dshort, [x, #30] - extr a, d, a, #8 - ldrb dshort, [x, #31] - extr a, d, a, #8 - str a, [z, #24] + ldrb dshort, [x, #24] + extr a, d, xzr, #8 + ldrb dshort, [x, #25] + extr a, d, a, #8 + ldrb dshort, [x, #26] + extr a, d, a, #8 + ldrb dshort, [x, #27] + extr a, d, a, #8 + ldrb dshort, [x, #28] + extr a, d, a, #8 + ldrb dshort, [x, #29] + extr a, d, a, #8 + ldrb dshort, [x, #30] + extr a, d, a, #8 + ldrb dshort, [x, #31] + extr a, d, a, #8 + str a, [z, #24] // word 4 - ldrb dshort, [x, #32] - extr a, d, xzr, #8 - ldrb dshort, [x, #33] - extr a, d, a, #8 - ldrb dshort, [x, #34] - extr a, d, a, #8 - ldrb dshort, [x, #35] - extr a, d, a, #8 - ldrb dshort, [x, #36] - extr a, d, a, #8 - ldrb dshort, [x, #37] - extr a, d, a, #8 - ldrb dshort, [x, #38] - extr a, d, a, #8 - ldrb dshort, [x, #39] - extr a, d, a, #8 - str a, [z, #32] + ldrb dshort, [x, #32] + extr a, d, xzr, #8 + ldrb dshort, [x, #33] + extr a, d, a, #8 + ldrb dshort, [x, #34] + extr a, d, a, #8 + ldrb dshort, [x, #35] + extr a, d, a, #8 + ldrb dshort, [x, #36] + extr a, d, a, #8 + ldrb dshort, [x, #37] + extr a, d, a, #8 + ldrb dshort, [x, #38] + extr a, d, a, #8 + ldrb dshort, [x, #39] + extr a, d, a, #8 + str a, [z, #32] // word 5 - ldrb dshort, [x, #40] - extr a, d, xzr, #8 - ldrb dshort, [x, #41] - extr a, d, a, #8 - ldrb dshort, [x, #42] - extr a, d, a, #8 - ldrb dshort, [x, #43] - extr a, d, a, #8 - ldrb dshort, [x, #44] - extr a, d, a, #8 - ldrb dshort, [x, #45] - extr a, d, a, #8 - ldrb dshort, [x, #46] - extr a, d, a, #8 - ldrb dshort, [x, #47] - extr a, d, a, #8 - str a, [z, #40] - - ret + ldrb dshort, [x, #40] + extr a, d, xzr, #8 + ldrb dshort, [x, #41] + extr a, d, a, #8 + ldrb dshort, [x, #42] + extr a, d, a, #8 + ldrb dshort, [x, #43] + extr a, d, a, #8 + ldrb dshort, [x, #44] + extr a, d, a, #8 + ldrb dshort, [x, #45] + extr a, d, a, #8 + ldrb dshort, [x, #46] + extr a, d, a, #8 + ldrb dshort, [x, #47] + extr a, d, a, #8 + str a, [z, #40] + + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_mod_n384.S b/arm/p384/bignum_mod_n384.S index daede0da98..b821c69235 100644 --- a/arm/p384/bignum_mod_n384.S +++ b/arm/p384/bignum_mod_n384.S @@ -70,10 +70,10 @@ // Loading large constants #define movbig(nn,n3,n2,n1,n0) \ - movz nn, n0; \ - movk nn, n1, lsl #16; \ - movk nn, n2, lsl #32; \ - movk nn, n3, lsl #48 + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 bignum_mod_n384: _bignum_mod_n384: @@ -82,133 +82,133 @@ _bignum_mod_n384_alt: // If the input is already <= 5 words long, go to a trivial "copy" path - cmp k, #6 - bcc short + cmp k, #6 + bcc short // Otherwise load the top 6 digits (top-down) and reduce k by 6 - sub k, k, #6 - lsl t0, k, #3 - add t0, t0, x - ldp m4, m5, [t0, #32] - ldp m2, m3, [t0, #16] - ldp m0, m1, [t0] + sub k, k, #6 + lsl t0, k, #3 + add t0, t0, x + ldp m4, m5, [t0, #32] + ldp m2, m3, [t0, #16] + ldp m0, m1, [t0] // Load the complicated three words of 2^384 - n_384 = [0; 0; 0; n2; n1; n0] - movbig( n0, #0x1313, #0xe695, #0x333a, #0xd68d) - movbig( n1, #0xa7e5, #0xf24d, #0xb74f, #0x5885) - movbig( n2, #0x389c, #0xb27e, #0x0bc8, #0xd220) + movbig( n0, #0x1313, #0xe695, #0x333a, #0xd68d) + movbig( n1, #0xa7e5, #0xf24d, #0xb74f, #0x5885) + movbig( n2, #0x389c, #0xb27e, #0x0bc8, #0xd220) // Reduce the top 6 digits mod n_384 (a conditional subtraction of n_384) - adds t0, m0, n0 - adcs t1, m1, n1 - adcs t2, m2, n2 - adcs t3, m3, xzr - adcs t4, m4, xzr - adcs t5, m5, xzr - csel m0, m0, t0, cc - csel m1, m1, t1, cc - csel m2, m2, t2, cc - csel m3, m3, t3, cc - csel m4, m4, t4, cc - csel m5, m5, t5, cc + adds t0, m0, n0 + adcs t1, m1, n1 + adcs t2, m2, n2 + adcs t3, m3, xzr + adcs t4, m4, xzr + adcs t5, m5, xzr + csel m0, m0, t0, cc + csel m1, m1, t1, cc + csel m2, m2, t2, cc + csel m3, m3, t3, cc + csel m4, m4, t4, cc + csel m5, m5, t5, cc // Now do (k-6) iterations of 7->6 word modular reduction - cbz k, writeback + cbz k, writeback loop: // Compute q = min (m5 + 1) (2^64 - 1) - adds q, m5, #1 - csetm t0, cs - orr q, q, t0 + adds q, m5, #1 + csetm t0, cs + orr q, q, t0 // [t3;t2;t1;t0] = q * (2^384 - n_384) - mul t0, n0, q - mul t1, n1, q - mul t2, n2, q + mul t0, n0, q + mul t1, n1, q + mul t2, n2, q - umulh t3, n0, q - adds t1, t1, t3 - umulh t3, n1, q - adcs t2, t2, t3 - umulh t3, n2, q - adc t3, xzr, t3 + umulh t3, n0, q + adds t1, t1, t3 + umulh t3, n1, q + adcs t2, t2, t3 + umulh t3, n2, q + adc t3, xzr, t3 // Decrement k and load the next digit - sub k, k, #1 - ldr d, [x, k, lsl #3] + sub k, k, #1 + ldr d, [x, k, lsl #3] // Compensate for 2^384 * q - sub m5, m5, q + sub m5, m5, q // [m5;m4;t4;t3;t2;t1;t0] = [m5;m4;m3;m2;m1;m0;d] - q * n_384 - adds t0, d, t0 - adcs t1, m0, t1 - adcs t2, m1, t2 - adcs t3, m2, t3 - adcs t4, m3, xzr - adcs m4, m4, xzr - adc m5, m5, xzr + adds t0, d, t0 + adcs t1, m0, t1 + adcs t2, m1, t2 + adcs t3, m2, t3 + adcs t4, m3, xzr + adcs m4, m4, xzr + adc m5, m5, xzr // Now our top word m5 is either zero or all 1s. Use it for a masked // addition of n_384, which we can do by a *subtraction* of // 2^384 - n_384 from our portion, re-using the constants - and t, m5, n0 - subs m0, t0, t - and t, m5, n1 - sbcs m1, t1, t - and t, m5, n2 - sbcs m2, t2, t - sbcs m3, t3, xzr - sbcs t, t4, xzr - sbc m5, m4, xzr - mov m4, t + and t, m5, n0 + subs m0, t0, t + and t, m5, n1 + sbcs m1, t1, t + and t, m5, n2 + sbcs m2, t2, t + sbcs m3, t3, xzr + sbcs t, t4, xzr + sbc m5, m4, xzr + mov m4, t - cbnz k, loop + cbnz k, loop // Finally write back [m5;m4;m3;m2;m1;m0] and return writeback: - stp m0, m1, [z] - stp m2, m3, [z, #16] - stp m4, m5, [z, #32] + stp m0, m1, [z] + stp m2, m3, [z, #16] + stp m4, m5, [z, #32] - ret + ret // Short case: just copy the input with zero-padding short: - mov m0, xzr - mov m1, xzr - mov m2, xzr - mov m3, xzr - mov m4, xzr - mov m5, xzr - - cbz k, writeback - ldr m0, [x] - subs k, k, #1 - beq writeback - ldr m1, [x, #8] - subs k, k, #1 - beq writeback - ldr m2, [x, #16] - subs k, k, #1 - beq writeback - ldr m3, [x, #24] - subs k, k, #1 - beq writeback - ldr m4, [x, #32] - b writeback + mov m0, xzr + mov m1, xzr + mov m2, xzr + mov m3, xzr + mov m4, xzr + mov m5, xzr + + cbz k, writeback + ldr m0, [x] + subs k, k, #1 + beq writeback + ldr m1, [x, #8] + subs k, k, #1 + beq writeback + ldr m2, [x, #16] + subs k, k, #1 + beq writeback + ldr m3, [x, #24] + subs k, k, #1 + beq writeback + ldr m4, [x, #32] + b writeback #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_mod_n384_6.S b/arm/p384/bignum_mod_n384_6.S index 825d2b610b..4f37566197 100644 --- a/arm/p384/bignum_mod_n384_6.S +++ b/arm/p384/bignum_mod_n384_6.S @@ -48,53 +48,53 @@ #define d5 x13 #define movbig(nn,n3,n2,n1,n0) \ - movz nn, n0; \ - movk nn, n1, lsl #16; \ - movk nn, n2, lsl #32; \ - movk nn, n3, lsl #48 + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 bignum_mod_n384_6: _bignum_mod_n384_6: // Load the complicated lower three words of n_384 - movbig( n0, #0xecec, #0x196a, #0xccc5, #0x2973) - movbig( n1, #0x581a, #0x0db2, #0x48b0, #0xa77a) - movbig( n2, #0xc763, #0x4d81, #0xf437, #0x2ddf) + movbig( n0, #0xecec, #0x196a, #0xccc5, #0x2973) + movbig( n1, #0x581a, #0x0db2, #0x48b0, #0xa77a) + movbig( n2, #0xc763, #0x4d81, #0xf437, #0x2ddf) // Load the input number - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Do the subtraction. Since the top three words of n_384 are all 1s // we can devolve the top to adding 0, thanks to the inverted carry. - subs n0, d0, n0 - sbcs n1, d1, n1 - sbcs n2, d2, n2 - adcs n3, d3, xzr - adcs n4, d4, xzr - adcs n5, d5, xzr + subs n0, d0, n0 + sbcs n1, d1, n1 + sbcs n2, d2, n2 + adcs n3, d3, xzr + adcs n4, d4, xzr + adcs n5, d5, xzr // Now if the carry is *clear* (inversion at work) the subtraction carried // and hence we should have done nothing, so we reset each n_i = d_i - csel n0, d0, n0, cc - csel n1, d1, n1, cc - csel n2, d2, n2, cc - csel n3, d3, n3, cc - csel n4, d4, n4, cc - csel n5, d5, n5, cc + csel n0, d0, n0, cc + csel n1, d1, n1, cc + csel n2, d2, n2, cc + csel n3, d3, n3, cc + csel n4, d4, n4, cc + csel n5, d5, n5, cc // Store the end result - stp n0, n1, [z] - stp n2, n3, [z, #16] - stp n4, n5, [z, #32] + stp n0, n1, [z] + stp n2, n3, [z, #16] + stp n4, n5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_mod_p384.S b/arm/p384/bignum_mod_p384.S index a81083b326..54c7ffcfa3 100644 --- a/arm/p384/bignum_mod_p384.S +++ b/arm/p384/bignum_mod_p384.S @@ -60,128 +60,128 @@ _bignum_mod_p384_alt: // If the input is already <= 5 words long, go to a trivial "copy" path - cmp k, #6 - bcc short + cmp k, #6 + bcc short // Otherwise load the top 6 digits (top-down) and reduce k by 6 - sub k, k, #6 - lsl t0, k, #3 - add t0, t0, x - ldp m4, m5, [t0, #32] - ldp m2, m3, [t0, #16] - ldp m0, m1, [t0] + sub k, k, #6 + lsl t0, k, #3 + add t0, t0, x + ldp m4, m5, [t0, #32] + ldp m2, m3, [t0, #16] + ldp m0, m1, [t0] // Load the complicated lower three words of p_384 = [-1;-1;-1;n2;n1;n0] - mov n0, #0x00000000ffffffff - mov n1, #0xffffffff00000000 - mov n2, #0xfffffffffffffffe + mov n0, #0x00000000ffffffff + mov n1, #0xffffffff00000000 + mov n2, #0xfffffffffffffffe // Reduce the top 6 digits mod p_384 (a conditional subtraction of p_384) - subs t0, m0, n0 - sbcs t1, m1, n1 - sbcs t2, m2, n2 - adcs t3, m3, xzr - adcs t4, m4, xzr - adcs t5, m5, xzr - csel m0, m0, t0, cc - csel m1, m1, t1, cc - csel m2, m2, t2, cc - csel m3, m3, t3, cc - csel m4, m4, t4, cc - csel m5, m5, t5, cc + subs t0, m0, n0 + sbcs t1, m1, n1 + sbcs t2, m2, n2 + adcs t3, m3, xzr + adcs t4, m4, xzr + adcs t5, m5, xzr + csel m0, m0, t0, cc + csel m1, m1, t1, cc + csel m2, m2, t2, cc + csel m3, m3, t3, cc + csel m4, m4, t4, cc + csel m5, m5, t5, cc // Now do (k-6) iterations of 7->6 word modular reduction - cbz k, writeback + cbz k, writeback loop: // Decrement k and load the next digit as t5. We now want to reduce // [m5;m4;m3;m2;m1;m0;t5] |-> [m5;m4;m3;m2;m1;m0]; the shuffling downwards is // absorbed into the various ALU operations - sub k, k, #1 - ldr t5, [x, k, lsl #3] + sub k, k, #1 + ldr t5, [x, k, lsl #3] // Initial quotient approximation q = min (h + 1) (2^64 - 1) - adds m5, m5, #1 - csetm t3, cs - add m5, m5, t3 - orn n1, xzr, t3 - sub t2, m5, #1 - sub t1, xzr, m5 + adds m5, m5, #1 + csetm t3, cs + add m5, m5, t3 + orn n1, xzr, t3 + sub t2, m5, #1 + sub t1, xzr, m5 // Correction term [m5;t2;t1;t0] = q * (2^384 - p_384), using m5 as a temp - lsl t0, t1, #32 - extr t1, t2, t1, #32 - lsr t2, t2, #32 - adds t0, t0, m5 - adcs t1, t1, xzr - adcs t2, t2, m5 - adc m5, xzr, xzr + lsl t0, t1, #32 + extr t1, t2, t1, #32 + lsr t2, t2, #32 + adds t0, t0, m5 + adcs t1, t1, xzr + adcs t2, t2, m5 + adc m5, xzr, xzr // Addition to the initial value - adds t0, t5, t0 - adcs t1, m0, t1 - adcs t2, m1, t2 - adcs t3, m2, m5 - adcs t4, m3, xzr - adcs t5, m4, xzr - adc n1, n1, xzr + adds t0, t5, t0 + adcs t1, m0, t1 + adcs t2, m1, t2 + adcs t3, m2, m5 + adcs t4, m3, xzr + adcs t5, m4, xzr + adc n1, n1, xzr // Use net top of the 7-word answer (now in n1) for masked correction - and m5, n0, n1 - adds m0, t0, m5 - eor m5, m5, n1 - adcs m1, t1, m5 - and m5, n2, n1 - adcs m2, t2, m5 - adcs m3, t3, n1 - adcs m4, t4, n1 - adc m5, t5, n1 + and m5, n0, n1 + adds m0, t0, m5 + eor m5, m5, n1 + adcs m1, t1, m5 + and m5, n2, n1 + adcs m2, t2, m5 + adcs m3, t3, n1 + adcs m4, t4, n1 + adc m5, t5, n1 - cbnz k, loop + cbnz k, loop // Finally write back [m5;m4;m3;m2;m1;m0] and return writeback: - stp m0, m1, [z] - stp m2, m3, [z, #16] - stp m4, m5, [z, #32] + stp m0, m1, [z] + stp m2, m3, [z, #16] + stp m4, m5, [z, #32] - ret + ret // Short case: just copy the input with zero-padding short: - mov m0, xzr - mov m1, xzr - mov m2, xzr - mov m3, xzr - mov m4, xzr - mov m5, xzr - - cbz k, writeback - ldr m0, [x] - subs k, k, #1 - beq writeback - ldr m1, [x, #8] - subs k, k, #1 - beq writeback - ldr m2, [x, #16] - subs k, k, #1 - beq writeback - ldr m3, [x, #24] - subs k, k, #1 - beq writeback - ldr m4, [x, #32] - b writeback + mov m0, xzr + mov m1, xzr + mov m2, xzr + mov m3, xzr + mov m4, xzr + mov m5, xzr + + cbz k, writeback + ldr m0, [x] + subs k, k, #1 + beq writeback + ldr m1, [x, #8] + subs k, k, #1 + beq writeback + ldr m2, [x, #16] + subs k, k, #1 + beq writeback + ldr m3, [x, #24] + subs k, k, #1 + beq writeback + ldr m4, [x, #32] + b writeback #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_mod_p384_6.S b/arm/p384/bignum_mod_p384_6.S index d0d1d14316..f02b401f7a 100644 --- a/arm/p384/bignum_mod_p384_6.S +++ b/arm/p384/bignum_mod_p384_6.S @@ -51,43 +51,43 @@ _bignum_mod_p384_6: // Load the complicated lower three words of p_384 = [-1;-1;-1;n2;n1;n0] - mov n0, #0x00000000ffffffff - mov n1, #0xffffffff00000000 - mov n2, #0xfffffffffffffffe + mov n0, #0x00000000ffffffff + mov n1, #0xffffffff00000000 + mov n2, #0xfffffffffffffffe // Load the input number - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Do the subtraction. Since the top three words of p_384 are all 1s // we can devolve the top to adding 0, thanks to the inverted carry. - subs n0, d0, n0 - sbcs n1, d1, n1 - sbcs n2, d2, n2 - adcs n3, d3, xzr - adcs n4, d4, xzr - adcs n5, d5, xzr + subs n0, d0, n0 + sbcs n1, d1, n1 + sbcs n2, d2, n2 + adcs n3, d3, xzr + adcs n4, d4, xzr + adcs n5, d5, xzr // Now if the carry is *clear* (inversion at work) the subtraction carried // and hence we should have done nothing, so we reset each n_i = d_i - csel n0, d0, n0, cc - csel n1, d1, n1, cc - csel n2, d2, n2, cc - csel n3, d3, n3, cc - csel n4, d4, n4, cc - csel n5, d5, n5, cc + csel n0, d0, n0, cc + csel n1, d1, n1, cc + csel n2, d2, n2, cc + csel n3, d3, n3, cc + csel n4, d4, n4, cc + csel n5, d5, n5, cc // Store the end result - stp n0, n1, [z] - stp n2, n3, [z, #16] - stp n4, n5, [z, #32] + stp n0, n1, [z] + stp n2, n3, [z, #16] + stp n4, n5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_montmul_p384.S b/arm/p384/bignum_montmul_p384.S index 3bd9b5ea61..3ffc1cd3da 100644 --- a/arm/p384/bignum_montmul_p384.S +++ b/arm/p384/bignum_montmul_p384.S @@ -63,29 +63,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ /* Recycle d0 (which we know gets implicitly cancelled) to store it */ \ - lsl t1, d0, #32; \ - add d0, t1, d0; \ + lsl t1, d0, #32; \ + add d0, t1, d0; \ /* Now let [t2;t1] = 2^64 * w - w + w_hi where w_hi = floor(w/2^32) */ \ /* We need to subtract 2^32 * this, and we can ignore its lower 32 */ \ /* bits since by design it will cancel anyway; we only need the w_hi */ \ /* part to get the carry propagation going. */ \ - lsr t1, d0, #32; \ - subs t1, t1, d0; \ - sbc t2, d0, xzr; \ + lsr t1, d0, #32; \ + subs t1, t1, d0; \ + sbc t2, d0, xzr; \ /* Now select in t1 the field to subtract from d1 */ \ - extr t1, t2, t1, #32; \ + extr t1, t2, t1, #32; \ /* And now get the terms to subtract from d2 and d3 */ \ - lsr t2, t2, #32; \ - adds t2, t2, d0; \ - adc t3, xzr, xzr; \ + lsr t2, t2, #32; \ + adds t2, t2, d0; \ + adc t3, xzr, xzr; \ /* Do the subtraction of that portion */ \ - subs d1, d1, t1; \ - sbcs d2, d2, t2; \ - sbcs d3, d3, t3; \ - sbcs d4, d4, xzr; \ - sbcs d5, d5, xzr; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ /* Now effectively add 2^384 * w by taking d0 as the input for last sbc */ \ - sbc d6, d0, xzr + sbc d6, d0, xzr #define a0 x3 #define a1 x4 @@ -118,62 +118,62 @@ _bignum_montmul_p384: // Save some registers - stp x19, x20, [sp, -16]! - stp x21, x22, [sp, -16]! - stp x23, x24, [sp, -16]! + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! // Load in all words of both inputs - ldp a0, a1, [x1] - ldp a2, a3, [x1, #16] - ldp a4, a5, [x1, #32] - ldp b0, b1, [x2] - ldp b2, b3, [x2, #16] - ldp b4, b5, [x2, #32] + ldp a0, a1, [x1] + ldp a2, a3, [x1, #16] + ldp a4, a5, [x1, #32] + ldp b0, b1, [x2] + ldp b2, b3, [x2, #16] + ldp b4, b5, [x2, #32] // Multiply low halves with a 3x3->6 ADK multiplier as [s5;s4;s3;s2;s1;s0] - mul s0, a0, b0 - mul t1, a1, b1 - mul t2, a2, b2 - umulh t3, a0, b0 - umulh t4, a1, b1 - umulh s5, a2, b2 - - adds t3, t3, t1 - adcs t4, t4, t2 - adc s5, s5, xzr - - adds s1, t3, s0 - adcs s2, t4, t3 - adcs s3, s5, t4 - adc s4, s5, xzr - - adds s2, s2, s0 - adcs s3, s3, t3 - adcs s4, s4, t4 - adc s5, s5, xzr - - muldiffn(t3,t2,t1, t4, a0,a1, b1,b0) - adds xzr, t3, #1 - adcs s1, s1, t1 - adcs s2, s2, t2 - adcs s3, s3, t3 - adcs s4, s4, t3 - adc s5, s5, t3 - - muldiffn(t3,t2,t1, t4, a0,a2, b2,b0) - adds xzr, t3, #1 - adcs s2, s2, t1 - adcs s3, s3, t2 - adcs s4, s4, t3 - adc s5, s5, t3 - - muldiffn(t3,t2,t1, t4, a1,a2, b2,b1) - adds xzr, t3, #1 - adcs s3, s3, t1 - adcs s4, s4, t2 - adc s5, s5, t3 + mul s0, a0, b0 + mul t1, a1, b1 + mul t2, a2, b2 + umulh t3, a0, b0 + umulh t4, a1, b1 + umulh s5, a2, b2 + + adds t3, t3, t1 + adcs t4, t4, t2 + adc s5, s5, xzr + + adds s1, t3, s0 + adcs s2, t4, t3 + adcs s3, s5, t4 + adc s4, s5, xzr + + adds s2, s2, s0 + adcs s3, s3, t3 + adcs s4, s4, t4 + adc s5, s5, xzr + + muldiffn(t3,t2,t1, t4, a0,a1, b1,b0) + adds xzr, t3, #1 + adcs s1, s1, t1 + adcs s2, s2, t2 + adcs s3, s3, t3 + adcs s4, s4, t3 + adc s5, s5, t3 + + muldiffn(t3,t2,t1, t4, a0,a2, b2,b0) + adds xzr, t3, #1 + adcs s2, s2, t1 + adcs s3, s3, t2 + adcs s4, s4, t3 + adc s5, s5, t3 + + muldiffn(t3,t2,t1, t4, a1,a2, b2,b1) + adds xzr, t3, #1 + adcs s3, s3, t1 + adcs s4, s4, t2 + adc s5, s5, t3 // Perform three "short" Montgomery steps on the low product // This shifts it to an offset compatible with middle terms @@ -181,202 +181,202 @@ _bignum_montmul_p384: // We could keep this in registers by directly adding to it in the next // ADK block, but if anything that seems to be slightly slower - montreds(s0,s5,s4,s3,s2,s1,s0, t1,t2,t3) + montreds(s0,s5,s4,s3,s2,s1,s0, t1,t2,t3) - montreds(s1,s0,s5,s4,s3,s2,s1, t1,t2,t3) + montreds(s1,s0,s5,s4,s3,s2,s1, t1,t2,t3) - montreds(s2,s1,s0,s5,s4,s3,s2, t1,t2,t3) + montreds(s2,s1,s0,s5,s4,s3,s2, t1,t2,t3) - stp s3, s4, [x0] - stp s5, s0, [x0, #16] - stp s1, s2, [x0, #32] + stp s3, s4, [x0] + stp s5, s0, [x0, #16] + stp s1, s2, [x0, #32] // Multiply high halves with a 3x3->6 ADK multiplier as [s5;s4;s3;s2;s1;s0] - mul s0, a3, b3 - mul t1, a4, b4 - mul t2, a5, b5 - umulh t3, a3, b3 - umulh t4, a4, b4 - umulh s5, a5, b5 - - adds t3, t3, t1 - adcs t4, t4, t2 - adc s5, s5, xzr - - adds s1, t3, s0 - adcs s2, t4, t3 - adcs s3, s5, t4 - adc s4, s5, xzr - - adds s2, s2, s0 - adcs s3, s3, t3 - adcs s4, s4, t4 - adc s5, s5, xzr - - muldiffn(t3,t2,t1, t4, a3,a4, b4,b3) - adds xzr, t3, #1 - adcs s1, s1, t1 - adcs s2, s2, t2 - adcs s3, s3, t3 - adcs s4, s4, t3 - adc s5, s5, t3 - - muldiffn(t3,t2,t1, t4, a3,a5, b5,b3) - adds xzr, t3, #1 - adcs s2, s2, t1 - adcs s3, s3, t2 - adcs s4, s4, t3 - adc s5, s5, t3 - - muldiffn(t3,t2,t1, t4, a4,a5, b5,b4) - adds xzr, t3, #1 - adcs s3, s3, t1 - adcs s4, s4, t2 - adc s5, s5, t3 + mul s0, a3, b3 + mul t1, a4, b4 + mul t2, a5, b5 + umulh t3, a3, b3 + umulh t4, a4, b4 + umulh s5, a5, b5 + + adds t3, t3, t1 + adcs t4, t4, t2 + adc s5, s5, xzr + + adds s1, t3, s0 + adcs s2, t4, t3 + adcs s3, s5, t4 + adc s4, s5, xzr + + adds s2, s2, s0 + adcs s3, s3, t3 + adcs s4, s4, t4 + adc s5, s5, xzr + + muldiffn(t3,t2,t1, t4, a3,a4, b4,b3) + adds xzr, t3, #1 + adcs s1, s1, t1 + adcs s2, s2, t2 + adcs s3, s3, t3 + adcs s4, s4, t3 + adc s5, s5, t3 + + muldiffn(t3,t2,t1, t4, a3,a5, b5,b3) + adds xzr, t3, #1 + adcs s2, s2, t1 + adcs s3, s3, t2 + adcs s4, s4, t3 + adc s5, s5, t3 + + muldiffn(t3,t2,t1, t4, a4,a5, b5,b4) + adds xzr, t3, #1 + adcs s3, s3, t1 + adcs s4, s4, t2 + adc s5, s5, t3 // Compute sign-magnitude a0,[a5,a4,a3] = x_hi - x_lo - subs a3, a3, a0 - sbcs a4, a4, a1 - sbcs a5, a5, a2 - sbc a0, xzr, xzr - adds xzr, a0, #1 - eor a3, a3, a0 - adcs a3, a3, xzr - eor a4, a4, a0 - adcs a4, a4, xzr - eor a5, a5, a0 - adc a5, a5, xzr + subs a3, a3, a0 + sbcs a4, a4, a1 + sbcs a5, a5, a2 + sbc a0, xzr, xzr + adds xzr, a0, #1 + eor a3, a3, a0 + adcs a3, a3, xzr + eor a4, a4, a0 + adcs a4, a4, xzr + eor a5, a5, a0 + adc a5, a5, xzr // Compute sign-magnitude b5,[b2,b1,b0] = y_lo - y_hi - subs b0, b0, b3 - sbcs b1, b1, b4 - sbcs b2, b2, b5 - sbc b5, xzr, xzr + subs b0, b0, b3 + sbcs b1, b1, b4 + sbcs b2, b2, b5 + sbc b5, xzr, xzr - adds xzr, b5, #1 - eor b0, b0, b5 - adcs b0, b0, xzr - eor b1, b1, b5 - adcs b1, b1, xzr - eor b2, b2, b5 - adc b2, b2, xzr + adds xzr, b5, #1 + eor b0, b0, b5 + adcs b0, b0, xzr + eor b1, b1, b5 + adcs b1, b1, xzr + eor b2, b2, b5 + adc b2, b2, xzr // Save the correct sign for the sub-product in b5 - eor b5, a0, b5 + eor b5, a0, b5 // Add the high H to the modified low term L' and re-stash 6 words, // keeping top word in s6 - ldp t1, t2, [x0] - adds s0, s0, t1 - adcs s1, s1, t2 - ldp t1, t2, [x0, #16] - adcs s2, s2, t1 - adcs s3, s3, t2 - ldp t1, t2, [x0, #32] - adcs s4, s4, t1 - adcs s5, s5, t2 - adc s6, xzr, xzr - stp s0, s1, [x0] - stp s2, s3, [x0, #16] - stp s4, s5, [x0, #32] + ldp t1, t2, [x0] + adds s0, s0, t1 + adcs s1, s1, t2 + ldp t1, t2, [x0, #16] + adcs s2, s2, t1 + adcs s3, s3, t2 + ldp t1, t2, [x0, #32] + adcs s4, s4, t1 + adcs s5, s5, t2 + adc s6, xzr, xzr + stp s0, s1, [x0] + stp s2, s3, [x0, #16] + stp s4, s5, [x0, #32] // Multiply with yet a third 3x3 ADK for the complex mid-term - mul s0, a3, b0 - mul t1, a4, b1 - mul t2, a5, b2 - umulh t3, a3, b0 - umulh t4, a4, b1 - umulh s5, a5, b2 - - adds t3, t3, t1 - adcs t4, t4, t2 - adc s5, s5, xzr - - adds s1, t3, s0 - adcs s2, t4, t3 - adcs s3, s5, t4 - adc s4, s5, xzr - - adds s2, s2, s0 - adcs s3, s3, t3 - adcs s4, s4, t4 - adc s5, s5, xzr - - muldiffn(t3,t2,t1, t4, a3,a4, b1,b0) - adds xzr, t3, #1 - adcs s1, s1, t1 - adcs s2, s2, t2 - adcs s3, s3, t3 - adcs s4, s4, t3 - adc s5, s5, t3 - - muldiffn(t3,t2,t1, t4, a3,a5, b2,b0) - adds xzr, t3, #1 - adcs s2, s2, t1 - adcs s3, s3, t2 - adcs s4, s4, t3 - adc s5, s5, t3 - - muldiffn(t3,t2,t1, t4, a4,a5, b2,b1) - adds xzr, t3, #1 - adcs s3, s3, t1 - adcs s4, s4, t2 - adc s5, s5, t3 + mul s0, a3, b0 + mul t1, a4, b1 + mul t2, a5, b2 + umulh t3, a3, b0 + umulh t4, a4, b1 + umulh s5, a5, b2 + + adds t3, t3, t1 + adcs t4, t4, t2 + adc s5, s5, xzr + + adds s1, t3, s0 + adcs s2, t4, t3 + adcs s3, s5, t4 + adc s4, s5, xzr + + adds s2, s2, s0 + adcs s3, s3, t3 + adcs s4, s4, t4 + adc s5, s5, xzr + + muldiffn(t3,t2,t1, t4, a3,a4, b1,b0) + adds xzr, t3, #1 + adcs s1, s1, t1 + adcs s2, s2, t2 + adcs s3, s3, t3 + adcs s4, s4, t3 + adc s5, s5, t3 + + muldiffn(t3,t2,t1, t4, a3,a5, b2,b0) + adds xzr, t3, #1 + adcs s2, s2, t1 + adcs s3, s3, t2 + adcs s4, s4, t3 + adc s5, s5, t3 + + muldiffn(t3,t2,t1, t4, a4,a5, b2,b1) + adds xzr, t3, #1 + adcs s3, s3, t1 + adcs s4, s4, t2 + adc s5, s5, t3 // Unstash the H + L' sum to add in twice - ldp a0, a1, [x0] - ldp a2, a3, [x0, #16] - ldp a4, a5, [x0, #32] + ldp a0, a1, [x0] + ldp a2, a3, [x0, #16] + ldp a4, a5, [x0, #32] // Set up a sign-modified version of the mid-product in a long accumulator // as [b3;b2;b1;b0;s5;s4;s3;s2;s1;s0], adding in the H + L' term once with // zero offset as this signed value is created - adds xzr, b5, #1 - eor s0, s0, b5 - adcs s0, s0, a0 - eor s1, s1, b5 - adcs s1, s1, a1 - eor s2, s2, b5 - adcs s2, s2, a2 - eor s3, s3, b5 - adcs s3, s3, a3 - eor s4, s4, b5 - adcs s4, s4, a4 - eor s5, s5, b5 - adcs s5, s5, a5 - adcs b0, b5, s6 - adcs b1, b5, xzr - adcs b2, b5, xzr - adc b3, b5, xzr + adds xzr, b5, #1 + eor s0, s0, b5 + adcs s0, s0, a0 + eor s1, s1, b5 + adcs s1, s1, a1 + eor s2, s2, b5 + adcs s2, s2, a2 + eor s3, s3, b5 + adcs s3, s3, a3 + eor s4, s4, b5 + adcs s4, s4, a4 + eor s5, s5, b5 + adcs s5, s5, a5 + adcs b0, b5, s6 + adcs b1, b5, xzr + adcs b2, b5, xzr + adc b3, b5, xzr // Add in the stashed H + L' term an offset of 3 words as well - adds s3, s3, a0 - adcs s4, s4, a1 - adcs s5, s5, a2 - adcs b0, b0, a3 - adcs b1, b1, a4 - adcs b2, b2, a5 - adc b3, b3, s6 + adds s3, s3, a0 + adcs s4, s4, a1 + adcs s5, s5, a2 + adcs b0, b0, a3 + adcs b1, b1, a4 + adcs b2, b2, a5 + adc b3, b3, s6 // Do three more Montgomery steps on the composed term - montreds(s0,s5,s4,s3,s2,s1,s0, t1,t2,t3) - montreds(s1,s0,s5,s4,s3,s2,s1, t1,t2,t3) - montreds(s2,s1,s0,s5,s4,s3,s2, t1,t2,t3) + montreds(s0,s5,s4,s3,s2,s1,s0, t1,t2,t3) + montreds(s1,s0,s5,s4,s3,s2,s1, t1,t2,t3) + montreds(s2,s1,s0,s5,s4,s3,s2, t1,t2,t3) - adds b0, b0, s0 - adcs b1, b1, s1 - adcs b2, b2, s2 - adc b3, b3, xzr + adds b0, b0, s0 + adcs b1, b1, s1 + adcs b2, b2, s2 + adc b3, b3, xzr // Because of the way we added L' in two places, we can overspill by // more than usual in Montgomery, with the result being only known to @@ -384,45 +384,45 @@ _bignum_montmul_p384: // elaborate final correction in the style of bignum_cmul_p384, just // a little bit simpler because we know q is small. - add t2, b3, #1 - lsl t1, t2, #32 - subs t4, t2, t1 - sbc t1, t1, xzr - - adds s3, s3, t4 - adcs s4, s4, t1 - adcs s5, s5, t2 - adcs b0, b0, xzr - adcs b1, b1, xzr - adcs b2, b2, xzr - - csetm t2, cc - - mov t3, #0x00000000ffffffff - and t3, t3, t2 - adds s3, s3, t3 - eor t3, t3, t2 - adcs s4, s4, t3 - mov t3, #0xfffffffffffffffe - and t3, t3, t2 - adcs s5, s5, t3 - adcs b0, b0, t2 - adcs b1, b1, t2 - adc b2, b2, t2 + add t2, b3, #1 + lsl t1, t2, #32 + subs t4, t2, t1 + sbc t1, t1, xzr + + adds s3, s3, t4 + adcs s4, s4, t1 + adcs s5, s5, t2 + adcs b0, b0, xzr + adcs b1, b1, xzr + adcs b2, b2, xzr + + csetm t2, cc + + mov t3, #0x00000000ffffffff + and t3, t3, t2 + adds s3, s3, t3 + eor t3, t3, t2 + adcs s4, s4, t3 + mov t3, #0xfffffffffffffffe + and t3, t3, t2 + adcs s5, s5, t3 + adcs b0, b0, t2 + adcs b1, b1, t2 + adc b2, b2, t2 // Write back the result - stp s3, s4, [x0] - stp s5, b0, [x0, #16] - stp b1, b2, [x0, #32] + stp s3, s4, [x0] + stp s5, b0, [x0, #16] + stp b1, b2, [x0, #32] // Restore registers and return - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_montmul_p384_alt.S b/arm/p384/bignum_montmul_p384_alt.S index 61a9ad590a..ec03123d0a 100644 --- a/arm/p384/bignum_montmul_p384_alt.S +++ b/arm/p384/bignum_montmul_p384_alt.S @@ -45,25 +45,25 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ /* Store it in d6 to make the 2^384 * w contribution already */ \ - lsl t1, d0, #32; \ - add d6, t1, d0; \ + lsl t1, d0, #32; \ + add d6, t1, d0; \ /* Now let [t3;t2;t1;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel d0 so we don't need it */ \ - mov t1, #0xffffffff00000001; \ - umulh t1, t1, d6; \ - mov t2, #0x00000000ffffffff; \ - mul t3, t2, d6; \ - umulh t2, t2, d6; \ - adds t1, t1, t3; \ - adcs t2, t2, d6; \ - adc t3, xzr, xzr; \ + mov t1, #0xffffffff00000001; \ + umulh t1, t1, d6; \ + mov t2, #0x00000000ffffffff; \ + mul t3, t2, d6; \ + umulh t2, t2, d6; \ + adds t1, t1, t3; \ + adcs t2, t2, d6; \ + adc t3, xzr, xzr; \ /* Now add it, by subtracting from 2^384 * w + x */ \ - subs d1, d1, t1; \ - sbcs d2, d2, t2; \ - sbcs d3, d3, t3; \ - sbcs d4, d4, xzr; \ - sbcs d5, d5, xzr; \ - sbc d6, d6, xzr + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ + sbc d6, d6, xzr #define z x0 @@ -107,244 +107,244 @@ _bignum_montmul_p384_alt: // Save more registers - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! // Load operands and set up row 0 = [u6;...;u0] = a0 * [b5;...;b0] - ldp a0, a1, [x] - ldp b0, b1, [y] + ldp a0, a1, [x] + ldp b0, b1, [y] - mul u0, a0, b0 - umulh u1, a0, b0 - mul l, a0, b1 - umulh u2, a0, b1 - adds u1, u1, l + mul u0, a0, b0 + umulh u1, a0, b0 + mul l, a0, b1 + umulh u2, a0, b1 + adds u1, u1, l - ldp b2, b3, [y, #16] + ldp b2, b3, [y, #16] - mul l, a0, b2 - umulh u3, a0, b2 - adcs u2, u2, l + mul l, a0, b2 + umulh u3, a0, b2 + adcs u2, u2, l - mul l, a0, b3 - umulh u4, a0, b3 - adcs u3, u3, l + mul l, a0, b3 + umulh u4, a0, b3 + adcs u3, u3, l - ldp b4, b5, [y, #32] + ldp b4, b5, [y, #32] - mul l, a0, b4 - umulh u5, a0, b4 - adcs u4, u4, l + mul l, a0, b4 + umulh u5, a0, b4 + adcs u4, u4, l - mul l, a0, b5 - umulh u6, a0, b5 - adcs u5, u5, l + mul l, a0, b5 + umulh u6, a0, b5 + adcs u5, u5, l - adc u6, u6, xzr + adc u6, u6, xzr // Row 1 = [u7;...;u0] = [a1;a0] * [b5;...;b0] - mul l, a1, b0 - adds u1, u1, l - mul l, a1, b1 - adcs u2, u2, l - mul l, a1, b2 - adcs u3, u3, l - mul l, a1, b3 - adcs u4, u4, l - mul l, a1, b4 - adcs u5, u5, l - mul l, a1, b5 - adcs u6, u6, l - cset u7, cs - - umulh l, a1, b0 - adds u2, u2, l - umulh l, a1, b1 - adcs u3, u3, l - umulh l, a1, b2 - adcs u4, u4, l - umulh l, a1, b3 - adcs u5, u5, l - umulh l, a1, b4 - adcs u6, u6, l - umulh l, a1, b5 - adc u7, u7, l + mul l, a1, b0 + adds u1, u1, l + mul l, a1, b1 + adcs u2, u2, l + mul l, a1, b2 + adcs u3, u3, l + mul l, a1, b3 + adcs u4, u4, l + mul l, a1, b4 + adcs u5, u5, l + mul l, a1, b5 + adcs u6, u6, l + cset u7, cs + + umulh l, a1, b0 + adds u2, u2, l + umulh l, a1, b1 + adcs u3, u3, l + umulh l, a1, b2 + adcs u4, u4, l + umulh l, a1, b3 + adcs u5, u5, l + umulh l, a1, b4 + adcs u6, u6, l + umulh l, a1, b5 + adc u7, u7, l // Row 2 = [u8;...;u0] = [a2;a1;a0] * [b5;...;b0] - ldp a2, a3, [x, #16] - - mul l, a2, b0 - adds u2, u2, l - mul l, a2, b1 - adcs u3, u3, l - mul l, a2, b2 - adcs u4, u4, l - mul l, a2, b3 - adcs u5, u5, l - mul l, a2, b4 - adcs u6, u6, l - mul l, a2, b5 - adcs u7, u7, l - cset u8, cs - - umulh l, a2, b0 - adds u3, u3, l - umulh l, a2, b1 - adcs u4, u4, l - umulh l, a2, b2 - adcs u5, u5, l - umulh l, a2, b3 - adcs u6, u6, l - umulh l, a2, b4 - adcs u7, u7, l - umulh l, a2, b5 - adc u8, u8, l + ldp a2, a3, [x, #16] + + mul l, a2, b0 + adds u2, u2, l + mul l, a2, b1 + adcs u3, u3, l + mul l, a2, b2 + adcs u4, u4, l + mul l, a2, b3 + adcs u5, u5, l + mul l, a2, b4 + adcs u6, u6, l + mul l, a2, b5 + adcs u7, u7, l + cset u8, cs + + umulh l, a2, b0 + adds u3, u3, l + umulh l, a2, b1 + adcs u4, u4, l + umulh l, a2, b2 + adcs u5, u5, l + umulh l, a2, b3 + adcs u6, u6, l + umulh l, a2, b4 + adcs u7, u7, l + umulh l, a2, b5 + adc u8, u8, l // Row 3 = [u9;...;u0] = [a3;a2;a1;a0] * [b5;...;b0] - mul l, a3, b0 - adds u3, u3, l - mul l, a3, b1 - adcs u4, u4, l - mul l, a3, b2 - adcs u5, u5, l - mul l, a3, b3 - adcs u6, u6, l - mul l, a3, b4 - adcs u7, u7, l - mul l, a3, b5 - adcs u8, u8, l - cset u9, cs - - umulh l, a3, b0 - adds u4, u4, l - umulh l, a3, b1 - adcs u5, u5, l - umulh l, a3, b2 - adcs u6, u6, l - umulh l, a3, b3 - adcs u7, u7, l - umulh l, a3, b4 - adcs u8, u8, l - umulh l, a3, b5 - adc u9, u9, l + mul l, a3, b0 + adds u3, u3, l + mul l, a3, b1 + adcs u4, u4, l + mul l, a3, b2 + adcs u5, u5, l + mul l, a3, b3 + adcs u6, u6, l + mul l, a3, b4 + adcs u7, u7, l + mul l, a3, b5 + adcs u8, u8, l + cset u9, cs + + umulh l, a3, b0 + adds u4, u4, l + umulh l, a3, b1 + adcs u5, u5, l + umulh l, a3, b2 + adcs u6, u6, l + umulh l, a3, b3 + adcs u7, u7, l + umulh l, a3, b4 + adcs u8, u8, l + umulh l, a3, b5 + adc u9, u9, l // Row 4 = [u10;...;u0] = [a4;a3;a2;a1;a0] * [b5;...;b0] - ldp a4, a5, [x, #32] - - mul l, a4, b0 - adds u4, u4, l - mul l, a4, b1 - adcs u5, u5, l - mul l, a4, b2 - adcs u6, u6, l - mul l, a4, b3 - adcs u7, u7, l - mul l, a4, b4 - adcs u8, u8, l - mul l, a4, b5 - adcs u9, u9, l - cset u10, cs - - umulh l, a4, b0 - adds u5, u5, l - umulh l, a4, b1 - adcs u6, u6, l - umulh l, a4, b2 - adcs u7, u7, l - umulh l, a4, b3 - adcs u8, u8, l - umulh l, a4, b4 - adcs u9, u9, l - umulh l, a4, b5 - adc u10, u10, l + ldp a4, a5, [x, #32] + + mul l, a4, b0 + adds u4, u4, l + mul l, a4, b1 + adcs u5, u5, l + mul l, a4, b2 + adcs u6, u6, l + mul l, a4, b3 + adcs u7, u7, l + mul l, a4, b4 + adcs u8, u8, l + mul l, a4, b5 + adcs u9, u9, l + cset u10, cs + + umulh l, a4, b0 + adds u5, u5, l + umulh l, a4, b1 + adcs u6, u6, l + umulh l, a4, b2 + adcs u7, u7, l + umulh l, a4, b3 + adcs u8, u8, l + umulh l, a4, b4 + adcs u9, u9, l + umulh l, a4, b5 + adc u10, u10, l // Row 5 = [u11;...;u0] = [a5;a4;a3;a2;a1;a0] * [b5;...;b0] - mul l, a5, b0 - adds u5, u5, l - mul l, a5, b1 - adcs u6, u6, l - mul l, a5, b2 - adcs u7, u7, l - mul l, a5, b3 - adcs u8, u8, l - mul l, a5, b4 - adcs u9, u9, l - mul l, a5, b5 - adcs u10, u10, l - cset u11, cs - - umulh l, a5, b0 - adds u6, u6, l - umulh l, a5, b1 - adcs u7, u7, l - umulh l, a5, b2 - adcs u8, u8, l - umulh l, a5, b3 - adcs u9, u9, l - umulh l, a5, b4 - adcs u10, u10, l - umulh l, a5, b5 - adc u11, u11, l + mul l, a5, b0 + adds u5, u5, l + mul l, a5, b1 + adcs u6, u6, l + mul l, a5, b2 + adcs u7, u7, l + mul l, a5, b3 + adcs u8, u8, l + mul l, a5, b4 + adcs u9, u9, l + mul l, a5, b5 + adcs u10, u10, l + cset u11, cs + + umulh l, a5, b0 + adds u6, u6, l + umulh l, a5, b1 + adcs u7, u7, l + umulh l, a5, b2 + adcs u8, u8, l + umulh l, a5, b3 + adcs u9, u9, l + umulh l, a5, b4 + adcs u10, u10, l + umulh l, a5, b5 + adc u11, u11, l // Montgomery rotate the low half - montreds(u0,u5,u4,u3,u2,u1,u0, b0,b1,b2) - montreds(u1,u0,u5,u4,u3,u2,u1, b0,b1,b2) - montreds(u2,u1,u0,u5,u4,u3,u2, b0,b1,b2) - montreds(u3,u2,u1,u0,u5,u4,u3, b0,b1,b2) - montreds(u4,u3,u2,u1,u0,u5,u4, b0,b1,b2) - montreds(u5,u4,u3,u2,u1,u0,u5, b0,b1,b2) + montreds(u0,u5,u4,u3,u2,u1,u0, b0,b1,b2) + montreds(u1,u0,u5,u4,u3,u2,u1, b0,b1,b2) + montreds(u2,u1,u0,u5,u4,u3,u2, b0,b1,b2) + montreds(u3,u2,u1,u0,u5,u4,u3, b0,b1,b2) + montreds(u4,u3,u2,u1,u0,u5,u4, b0,b1,b2) + montreds(u5,u4,u3,u2,u1,u0,u5, b0,b1,b2) // Add up the high and low parts as [h; u5;u4;u3;u2;u1;u0] = z - adds u0, u0, u6 - adcs u1, u1, u7 - adcs u2, u2, u8 - adcs u3, u3, u9 - adcs u4, u4, u10 - adcs u5, u5, u11 - adc h, xzr, xzr + adds u0, u0, u6 + adcs u1, u1, u7 + adcs u2, u2, u8 + adcs u3, u3, u9 + adcs u4, u4, u10 + adcs u5, u5, u11 + adc h, xzr, xzr // Now add [h; u11;u10;u9;u8;u7;u6] = z + (2^384 - p_384) - mov l, #0xffffffff00000001 - adds u6, u0, l - mov l, #0x00000000ffffffff - adcs u7, u1, l - mov l, #0x0000000000000001 - adcs u8, u2, l - adcs u9, u3, xzr - adcs u10, u4, xzr - adcs u11, u5, xzr - adcs h, h, xzr + mov l, #0xffffffff00000001 + adds u6, u0, l + mov l, #0x00000000ffffffff + adcs u7, u1, l + mov l, #0x0000000000000001 + adcs u8, u2, l + adcs u9, u3, xzr + adcs u10, u4, xzr + adcs u11, u5, xzr + adcs h, h, xzr // Now z >= p_384 iff h is nonzero, so select accordingly - csel u0, u0, u6, eq - csel u1, u1, u7, eq - csel u2, u2, u8, eq - csel u3, u3, u9, eq - csel u4, u4, u10, eq - csel u5, u5, u11, eq + csel u0, u0, u6, eq + csel u1, u1, u7, eq + csel u2, u2, u8, eq + csel u3, u3, u9, eq + csel u4, u4, u10, eq + csel u5, u5, u11, eq // Store back final result - stp u0, u1, [z] - stp u2, u3, [z, #16] - stp u4, u5, [z, #32] + stp u0, u1, [z] + stp u2, u3, [z, #16] + stp u4, u5, [z, #32] // Restore registers - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_montsqr_p384.S b/arm/p384/bignum_montsqr_p384.S index d9f30aed9d..01b52cc000 100644 --- a/arm/p384/bignum_montsqr_p384.S +++ b/arm/p384/bignum_montsqr_p384.S @@ -62,29 +62,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ /* Recycle d0 (which we know gets implicitly cancelled) to store it */ \ - lsl t1, d0, #32; \ - add d0, t1, d0; \ + lsl t1, d0, #32; \ + add d0, t1, d0; \ /* Now let [t2;t1] = 2^64 * w - w + w_hi where w_hi = floor(w/2^32) */ \ /* We need to subtract 2^32 * this, and we can ignore its lower 32 */ \ /* bits since by design it will cancel anyway; we only need the w_hi */ \ /* part to get the carry propagation going. */ \ - lsr t1, d0, #32; \ - subs t1, t1, d0; \ - sbc t2, d0, xzr; \ + lsr t1, d0, #32; \ + subs t1, t1, d0; \ + sbc t2, d0, xzr; \ /* Now select in t1 the field to subtract from d1 */ \ - extr t1, t2, t1, #32; \ + extr t1, t2, t1, #32; \ /* And now get the terms to subtract from d2 and d3 */ \ - lsr t2, t2, #32; \ - adds t2, t2, d0; \ - adc t3, xzr, xzr; \ + lsr t2, t2, #32; \ + adds t2, t2, d0; \ + adc t3, xzr, xzr; \ /* Do the subtraction of that portion */ \ - subs d1, d1, t1; \ - sbcs d2, d2, t2; \ - sbcs d3, d3, t3; \ - sbcs d4, d4, xzr; \ - sbcs d5, d5, xzr; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ /* Now effectively add 2^384 * w by taking d0 as the input for last sbc */ \ - sbc d6, d0, xzr + sbc d6, d0, xzr #define a0 x2 #define a1 x3 @@ -109,55 +109,55 @@ _bignum_montsqr_p384: // Load in all words of the input - ldp a0, a1, [x1] - ldp a2, a3, [x1, #16] - ldp a4, a5, [x1, #32] + ldp a0, a1, [x1] + ldp a2, a3, [x1, #16] + ldp a4, a5, [x1, #32] // Square the low half getting a result in [c5;c4;c3;c2;c1;c0] - mul d1, a0, a1 - mul d2, a0, a2 - mul d3, a1, a2 - mul c0, a0, a0 - mul c2, a1, a1 - mul c4, a2, a2 - - umulh d4, a0, a1 - adds d2, d2, d4 - umulh d4, a0, a2 - adcs d3, d3, d4 - umulh d4, a1, a2 - adcs d4, d4, xzr - - umulh c1, a0, a0 - umulh c3, a1, a1 - umulh c5, a2, a2 - - adds d1, d1, d1 - adcs d2, d2, d2 - adcs d3, d3, d3 - adcs d4, d4, d4 - adc c5, c5, xzr - - adds c1, c1, d1 - adcs c2, c2, d2 - adcs c3, c3, d3 - adcs c4, c4, d4 - adc c5, c5, xzr + mul d1, a0, a1 + mul d2, a0, a2 + mul d3, a1, a2 + mul c0, a0, a0 + mul c2, a1, a1 + mul c4, a2, a2 + + umulh d4, a0, a1 + adds d2, d2, d4 + umulh d4, a0, a2 + adcs d3, d3, d4 + umulh d4, a1, a2 + adcs d4, d4, xzr + + umulh c1, a0, a0 + umulh c3, a1, a1 + umulh c5, a2, a2 + + adds d1, d1, d1 + adcs d2, d2, d2 + adcs d3, d3, d3 + adcs d4, d4, d4 + adc c5, c5, xzr + + adds c1, c1, d1 + adcs c2, c2, d2 + adcs c3, c3, d3 + adcs c4, c4, d4 + adc c5, c5, xzr // Perform three "short" Montgomery steps on the low square // This shifts it to an offset compatible with middle product // Stash the result temporarily in the output buffer (to avoid more registers) - montreds(c0,c5,c4,c3,c2,c1,c0, d1,d2,d3) + montreds(c0,c5,c4,c3,c2,c1,c0, d1,d2,d3) - montreds(c1,c0,c5,c4,c3,c2,c1, d1,d2,d3) + montreds(c1,c0,c5,c4,c3,c2,c1, d1,d2,d3) - montreds(c2,c1,c0,c5,c4,c3,c2, d1,d2,d3) + montreds(c2,c1,c0,c5,c4,c3,c2, d1,d2,d3) - stp c3, c4, [x0] - stp c5, c0, [x0, #16] - stp c1, c2, [x0, #32] + stp c3, c4, [x0] + stp c5, c0, [x0, #16] + stp c1, c2, [x0, #32] // Compute product of the cross-term with ADK 3x3->6 multiplier @@ -186,81 +186,81 @@ _bignum_montsqr_p384: #define l h0 #define t h1 - mul s0, a0, a3 - mul l1, a1, a4 - mul l2, a2, a5 - umulh h0, a0, a3 - umulh h1, a1, a4 - umulh h2, a2, a5 - - adds h0, h0, l1 - adcs h1, h1, l2 - adc h2, h2, xzr - - adds s1, h0, s0 - adcs s2, h1, h0 - adcs s3, h2, h1 - adc s4, h2, xzr - - adds s2, s2, s0 - adcs s3, s3, h0 - adcs s4, s4, h1 - adc s5, h2, xzr - - muldiffn(c,h,l, t, a0,a1, a4,a3) - adds xzr, c, #1 - adcs s1, s1, l - adcs s2, s2, h - adcs s3, s3, c - adcs s4, s4, c - adc s5, s5, c - - muldiffn(c,h,l, t, a0,a2, a5,a3) - adds xzr, c, #1 - adcs s2, s2, l - adcs s3, s3, h - adcs s4, s4, c - adc s5, s5, c - - muldiffn(c,h,l, t, a1,a2, a5,a4) - adds xzr, c, #1 - adcs s3, s3, l - adcs s4, s4, h - adc s5, s5, c + mul s0, a0, a3 + mul l1, a1, a4 + mul l2, a2, a5 + umulh h0, a0, a3 + umulh h1, a1, a4 + umulh h2, a2, a5 + + adds h0, h0, l1 + adcs h1, h1, l2 + adc h2, h2, xzr + + adds s1, h0, s0 + adcs s2, h1, h0 + adcs s3, h2, h1 + adc s4, h2, xzr + + adds s2, s2, s0 + adcs s3, s3, h0 + adcs s4, s4, h1 + adc s5, h2, xzr + + muldiffn(c,h,l, t, a0,a1, a4,a3) + adds xzr, c, #1 + adcs s1, s1, l + adcs s2, s2, h + adcs s3, s3, c + adcs s4, s4, c + adc s5, s5, c + + muldiffn(c,h,l, t, a0,a2, a5,a3) + adds xzr, c, #1 + adcs s2, s2, l + adcs s3, s3, h + adcs s4, s4, c + adc s5, s5, c + + muldiffn(c,h,l, t, a1,a2, a5,a4) + adds xzr, c, #1 + adcs s3, s3, l + adcs s4, s4, h + adc s5, s5, c // Double it and add the stashed Montgomerified low square - adds s0, s0, s0 - adcs s1, s1, s1 - adcs s2, s2, s2 - adcs s3, s3, s3 - adcs s4, s4, s4 - adcs s5, s5, s5 - adc s6, xzr, xzr - - ldp a0, a1, [x0] - adds s0, s0, a0 - adcs s1, s1, a1 - ldp a0, a1, [x0, #16] - adcs s2, s2, a0 - adcs s3, s3, a1 - ldp a0, a1, [x0, #32] - adcs s4, s4, a0 - adcs s5, s5, a1 - adc s6, s6, xzr + adds s0, s0, s0 + adcs s1, s1, s1 + adcs s2, s2, s2 + adcs s3, s3, s3 + adcs s4, s4, s4 + adcs s5, s5, s5 + adc s6, xzr, xzr + + ldp a0, a1, [x0] + adds s0, s0, a0 + adcs s1, s1, a1 + ldp a0, a1, [x0, #16] + adcs s2, s2, a0 + adcs s3, s3, a1 + ldp a0, a1, [x0, #32] + adcs s4, s4, a0 + adcs s5, s5, a1 + adc s6, s6, xzr // Montgomery-reduce the combined low and middle term another thrice - montreds(s0,s5,s4,s3,s2,s1,s0, a0,a1,a2) + montreds(s0,s5,s4,s3,s2,s1,s0, a0,a1,a2) - montreds(s1,s0,s5,s4,s3,s2,s1, a0,a1,a2) + montreds(s1,s0,s5,s4,s3,s2,s1, a0,a1,a2) - montreds(s2,s1,s0,s5,s4,s3,s2, a0,a1,a2) + montreds(s2,s1,s0,s5,s4,s3,s2, a0,a1,a2) - adds s6, s6, s0 - adcs s0, s1, xzr - adcs s1, s2, xzr - adcs s2, xzr, xzr + adds s6, s6, s0 + adcs s0, s1, xzr + adcs s1, s2, xzr + adcs s2, xzr, xzr // Our sum so far is in [s2;s1;s0;s6;s5;s4;s3] // Choose more intuitive names @@ -288,48 +288,48 @@ _bignum_montsqr_p384: // Add in all the pure squares 33 + 44 + 55 - mul t1, a3, a3 - adds r0, r0, t1 - mul t2, a4, a4 - mul t3, a5, a5 - umulh t1, a3, a3 - adcs r1, r1, t1 - umulh t1, a4, a4 - adcs r2, r2, t2 - adcs r3, r3, t1 - umulh t1, a5, a5 - adcs r4, r4, t3 - adcs r5, r5, t1 - adc r6, r6, xzr + mul t1, a3, a3 + adds r0, r0, t1 + mul t2, a4, a4 + mul t3, a5, a5 + umulh t1, a3, a3 + adcs r1, r1, t1 + umulh t1, a4, a4 + adcs r2, r2, t2 + adcs r3, r3, t1 + umulh t1, a5, a5 + adcs r4, r4, t3 + adcs r5, r5, t1 + adc r6, r6, xzr // Now compose the 34 + 35 + 45 terms, which need doubling - mul t1, a3, a4 - mul t2, a3, a5 - mul t3, a4, a5 - umulh t4, a3, a4 - adds t2, t2, t4 - umulh t4, a3, a5 - adcs t3, t3, t4 - umulh t4, a4, a5 - adc t4, t4, xzr + mul t1, a3, a4 + mul t2, a3, a5 + mul t3, a4, a5 + umulh t4, a3, a4 + adds t2, t2, t4 + umulh t4, a3, a5 + adcs t3, t3, t4 + umulh t4, a4, a5 + adc t4, t4, xzr // Double and add. Recycle one of the no-longer-needed inputs as a temp #define t5 x5 - adds t1, t1, t1 - adcs t2, t2, t2 - adcs t3, t3, t3 - adcs t4, t4, t4 - adc t5, xzr, xzr + adds t1, t1, t1 + adcs t2, t2, t2 + adcs t3, t3, t3 + adcs t4, t4, t4 + adc t5, xzr, xzr - adds r1, r1, t1 - adcs r2, r2, t2 - adcs r3, r3, t3 - adcs r4, r4, t4 - adcs r5, r5, t5 - adc r6, r6, xzr + adds r1, r1, t1 + adcs r2, r2, t2 + adcs r3, r3, t3 + adcs r4, r4, t4 + adcs r5, r5, t5 + adc r6, r6, xzr // We know, writing B = 2^{6*64} that the full implicit result is // B^2 c <= z + (B - 1) * p < B * p + (B - 1) * p < 2 * B * p, @@ -338,48 +338,48 @@ _bignum_montsqr_p384: // comparison to catch cases where the residue is >= p. // First set [0;0;0;t3;t2;t1] = 2^384 - p_384 - mov t1, #0xffffffff00000001 - mov t2, #0x00000000ffffffff - mov t3, #0x0000000000000001 + mov t1, #0xffffffff00000001 + mov t2, #0x00000000ffffffff + mov t3, #0x0000000000000001 // Let dd = [] be the 6-word intermediate result. // Set CF if the addition dd + (2^384 - p_384) >= 2^384, hence iff dd >= p_384. - adds xzr, r0, t1 - adcs xzr, r1, t2 - adcs xzr, r2, t3 - adcs xzr, r3, xzr - adcs xzr, r4, xzr - adcs xzr, r5, xzr + adds xzr, r0, t1 + adcs xzr, r1, t2 + adcs xzr, r2, t3 + adcs xzr, r3, xzr + adcs xzr, r4, xzr + adcs xzr, r5, xzr // Now just add this new carry into the existing r6. It's easy to see they // can't both be 1 by our range assumptions, so this gives us a {0,1} flag - adc r6, r6, xzr + adc r6, r6, xzr // Now convert it into a bitmask - sub r6, xzr, r6 + sub r6, xzr, r6 // Masked addition of 2^384 - p_384, hence subtraction of p_384 - and t1, t1, r6 - adds r0, r0, t1 - and t2, t2, r6 - adcs r1, r1, t2 - and t3, t3, r6 - adcs r2, r2, t3 - adcs r3, r3, xzr - adcs r4, r4, xzr - adc r5, r5, xzr + and t1, t1, r6 + adds r0, r0, t1 + and t2, t2, r6 + adcs r1, r1, t2 + and t3, t3, r6 + adcs r2, r2, t3 + adcs r3, r3, xzr + adcs r4, r4, xzr + adc r5, r5, xzr // Store it back - stp r0, r1, [x0] - stp r2, r3, [x0, #16] - stp r4, r5, [x0, #32] + stp r0, r1, [x0] + stp r2, r3, [x0, #16] + stp r4, r5, [x0, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_montsqr_p384_alt.S b/arm/p384/bignum_montsqr_p384_alt.S index f10a6aad17..7a0ed8de8e 100644 --- a/arm/p384/bignum_montsqr_p384_alt.S +++ b/arm/p384/bignum_montsqr_p384_alt.S @@ -44,25 +44,25 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ /* Store it in d6 to make the 2^384 * w contribution already */ \ - lsl t1, d0, #32; \ - add d6, t1, d0; \ + lsl t1, d0, #32; \ + add d6, t1, d0; \ /* Now let [t3;t2;t1;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel d0 so we don't need it */ \ - mov t1, #0xffffffff00000001; \ - umulh t1, t1, d6; \ - mov t2, #0x00000000ffffffff; \ - mul t3, t2, d6; \ - umulh t2, t2, d6; \ - adds t1, t1, t3; \ - adcs t2, t2, d6; \ - adc t3, xzr, xzr; \ + mov t1, #0xffffffff00000001; \ + umulh t1, t1, d6; \ + mov t2, #0x00000000ffffffff; \ + mul t3, t2, d6; \ + umulh t2, t2, d6; \ + adds t1, t1, t3; \ + adcs t2, t2, d6; \ + adc t3, xzr, xzr; \ /* Now add it, by subtracting from 2^384 * w + x */ \ - subs d1, d1, t1; \ - sbcs d2, d2, t2; \ - sbcs d3, d3, t3; \ - sbcs d4, d4, xzr; \ - sbcs d5, d5, xzr; \ - sbc d6, d6, xzr + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ + sbc d6, d6, xzr #define z x0 #define x x1 @@ -95,185 +95,185 @@ _bignum_montsqr_p384_alt: // It's convenient to have two more registers to play with - stp x19, x20, [sp, #-16]! + stp x19, x20, [sp, #-16]! // Load all the elements as [a5;a4;a3;a2;a1;a0], set up an initial // window [u8;u7; u6;u5; u4;u3; u2;u1] = [34;05;03;01], and then // chain in the addition of 02 + 12 + 13 + 14 + 15 to that window // (no carry-out possible since we add it to the top of a product). - ldp a0, a1, [x] + ldp a0, a1, [x] - mul u1, a0, a1 - umulh u2, a0, a1 + mul u1, a0, a1 + umulh u2, a0, a1 - ldp a2, a3, [x, #16] + ldp a2, a3, [x, #16] - mul l, a0, a2 - adds u2, u2, l + mul l, a0, a2 + adds u2, u2, l - mul u3, a0, a3 - mul l, a1, a2 - adcs u3, u3, l + mul u3, a0, a3 + mul l, a1, a2 + adcs u3, u3, l - umulh u4, a0, a3 - mul l, a1, a3 - adcs u4, u4, l + umulh u4, a0, a3 + mul l, a1, a3 + adcs u4, u4, l - ldp a4, a5, [x, #32] + ldp a4, a5, [x, #32] - mul u5, a0, a5 - mul l, a1, a4 - adcs u5, u5, l + mul u5, a0, a5 + mul l, a1, a4 + adcs u5, u5, l - umulh u6, a0, a5 - mul l, a1, a5 - adcs u6, u6, l + umulh u6, a0, a5 + mul l, a1, a5 + adcs u6, u6, l - mul u7, a3, a4 - adcs u7, u7, xzr + mul u7, a3, a4 + adcs u7, u7, xzr - umulh u8, a3, a4 - adc u8, u8, xzr + umulh u8, a3, a4 + adc u8, u8, xzr - umulh l, a0, a2 - adds u3, u3, l - umulh l, a1, a2 - adcs u4, u4, l - umulh l, a1, a3 - adcs u5, u5, l - umulh l, a1, a4 - adcs u6, u6, l - umulh l, a1, a5 - adcs u7, u7, l - adc u8, u8, xzr + umulh l, a0, a2 + adds u3, u3, l + umulh l, a1, a2 + adcs u4, u4, l + umulh l, a1, a3 + adcs u5, u5, l + umulh l, a1, a4 + adcs u6, u6, l + umulh l, a1, a5 + adcs u7, u7, l + adc u8, u8, xzr // Now chain in the 04 + 23 + 24 + 25 + 35 + 45 terms - mul l, a0, a4 - adds u4, u4, l - mul l, a2, a3 - adcs u5, u5, l - mul l, a2, a4 - adcs u6, u6, l - mul l, a2, a5 - adcs u7, u7, l - mul l, a3, a5 - adcs u8, u8, l - mul u9, a4, a5 - adcs u9, u9, xzr - umulh u10, a4, a5 - adc u10, u10, xzr - - umulh l, a0, a4 - adds u5, u5, l - umulh l, a2, a3 - adcs u6, u6, l - umulh l, a2, a4 - adcs u7, u7, l - umulh l, a2, a5 - adcs u8, u8, l - umulh l, a3, a5 - adcs u9, u9, l - adc u10, u10, xzr + mul l, a0, a4 + adds u4, u4, l + mul l, a2, a3 + adcs u5, u5, l + mul l, a2, a4 + adcs u6, u6, l + mul l, a2, a5 + adcs u7, u7, l + mul l, a3, a5 + adcs u8, u8, l + mul u9, a4, a5 + adcs u9, u9, xzr + umulh u10, a4, a5 + adc u10, u10, xzr + + umulh l, a0, a4 + adds u5, u5, l + umulh l, a2, a3 + adcs u6, u6, l + umulh l, a2, a4 + adcs u7, u7, l + umulh l, a2, a5 + adcs u8, u8, l + umulh l, a3, a5 + adcs u9, u9, l + adc u10, u10, xzr // Double that, with u11 holding the top carry - adds u1, u1, u1 - adcs u2, u2, u2 - adcs u3, u3, u3 - adcs u4, u4, u4 - adcs u5, u5, u5 - adcs u6, u6, u6 - adcs u7, u7, u7 - adcs u8, u8, u8 - adcs u9, u9, u9 - adcs u10, u10, u10 - cset u11, cs + adds u1, u1, u1 + adcs u2, u2, u2 + adcs u3, u3, u3 + adcs u4, u4, u4 + adcs u5, u5, u5 + adcs u6, u6, u6 + adcs u7, u7, u7 + adcs u8, u8, u8 + adcs u9, u9, u9 + adcs u10, u10, u10 + cset u11, cs // Add the homogeneous terms 00 + 11 + 22 + 33 + 44 + 55 - umulh l, a0, a0 - mul u0, a0, a0 - adds u1, u1, l + umulh l, a0, a0 + mul u0, a0, a0 + adds u1, u1, l - mul l, a1, a1 - adcs u2, u2, l - umulh l, a1, a1 - adcs u3, u3, l + mul l, a1, a1 + adcs u2, u2, l + umulh l, a1, a1 + adcs u3, u3, l - mul l, a2, a2 - adcs u4, u4, l - umulh l, a2, a2 - adcs u5, u5, l + mul l, a2, a2 + adcs u4, u4, l + umulh l, a2, a2 + adcs u5, u5, l - mul l, a3, a3 - adcs u6, u6, l - umulh l, a3, a3 - adcs u7, u7, l + mul l, a3, a3 + adcs u6, u6, l + umulh l, a3, a3 + adcs u7, u7, l - mul l, a4, a4 - adcs u8, u8, l - umulh l, a4, a4 - adcs u9, u9, l + mul l, a4, a4 + adcs u8, u8, l + umulh l, a4, a4 + adcs u9, u9, l - mul l, a5, a5 - adcs u10, u10, l - umulh l, a5, a5 - adc u11, u11, l + mul l, a5, a5 + adcs u10, u10, l + umulh l, a5, a5 + adc u11, u11, l // Montgomery rotate the low half - montreds(u0,u5,u4,u3,u2,u1,u0, a1,a2,a3) - montreds(u1,u0,u5,u4,u3,u2,u1, a1,a2,a3) - montreds(u2,u1,u0,u5,u4,u3,u2, a1,a2,a3) - montreds(u3,u2,u1,u0,u5,u4,u3, a1,a2,a3) - montreds(u4,u3,u2,u1,u0,u5,u4, a1,a2,a3) - montreds(u5,u4,u3,u2,u1,u0,u5, a1,a2,a3) + montreds(u0,u5,u4,u3,u2,u1,u0, a1,a2,a3) + montreds(u1,u0,u5,u4,u3,u2,u1, a1,a2,a3) + montreds(u2,u1,u0,u5,u4,u3,u2, a1,a2,a3) + montreds(u3,u2,u1,u0,u5,u4,u3, a1,a2,a3) + montreds(u4,u3,u2,u1,u0,u5,u4, a1,a2,a3) + montreds(u5,u4,u3,u2,u1,u0,u5, a1,a2,a3) // Add up the high and low parts as [h; u5;u4;u3;u2;u1;u0] = z - adds u0, u0, u6 - adcs u1, u1, u7 - adcs u2, u2, u8 - adcs u3, u3, u9 - adcs u4, u4, u10 - adcs u5, u5, u11 - adc h, xzr, xzr + adds u0, u0, u6 + adcs u1, u1, u7 + adcs u2, u2, u8 + adcs u3, u3, u9 + adcs u4, u4, u10 + adcs u5, u5, u11 + adc h, xzr, xzr // Now add [h; u11;u10;u9;u8;u7;u6] = z + (2^384 - p_384) - mov l, #0xffffffff00000001 - adds u6, u0, l - mov l, #0x00000000ffffffff - adcs u7, u1, l - mov l, #0x0000000000000001 - adcs u8, u2, l - adcs u9, u3, xzr - adcs u10, u4, xzr - adcs u11, u5, xzr - adcs h, h, xzr + mov l, #0xffffffff00000001 + adds u6, u0, l + mov l, #0x00000000ffffffff + adcs u7, u1, l + mov l, #0x0000000000000001 + adcs u8, u2, l + adcs u9, u3, xzr + adcs u10, u4, xzr + adcs u11, u5, xzr + adcs h, h, xzr // Now z >= p_384 iff h is nonzero, so select accordingly - csel u0, u0, u6, eq - csel u1, u1, u7, eq - csel u2, u2, u8, eq - csel u3, u3, u9, eq - csel u4, u4, u10, eq - csel u5, u5, u11, eq + csel u0, u0, u6, eq + csel u1, u1, u7, eq + csel u2, u2, u8, eq + csel u3, u3, u9, eq + csel u4, u4, u10, eq + csel u5, u5, u11, eq // Store back final result - stp u0, u1, [z] - stp u2, u3, [z, #16] - stp u4, u5, [z, #32] + stp u0, u1, [z] + stp u2, u3, [z, #16] + stp u4, u5, [z, #32] // Restore registers - ldp x19, x20, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_mux_6.S b/arm/p384/bignum_mux_6.S index ec472ad831..e8f13bc2a8 100644 --- a/arm/p384/bignum_mux_6.S +++ b/arm/p384/bignum_mux_6.S @@ -41,39 +41,39 @@ bignum_mux_6: _bignum_mux_6: - cmp p, #0 // Set condition codes p = 0 + cmp p, #0 // Set condition codes p = 0 - ldr a, [x] - ldr p, [y] - csel a, a, p, ne - str a, [z] + ldr a, [x] + ldr p, [y] + csel a, a, p, ne + str a, [z] - ldr a, [x, #8] - ldr p, [y, #8] - csel a, a, p, ne - str a, [z, #8] + ldr a, [x, #8] + ldr p, [y, #8] + csel a, a, p, ne + str a, [z, #8] - ldr a, [x, #16] - ldr p, [y, #16] - csel a, a, p, ne - str a, [z, #16] + ldr a, [x, #16] + ldr p, [y, #16] + csel a, a, p, ne + str a, [z, #16] - ldr a, [x, #24] - ldr p, [y, #24] - csel a, a, p, ne - str a, [z, #24] + ldr a, [x, #24] + ldr p, [y, #24] + csel a, a, p, ne + str a, [z, #24] - ldr a, [x, #32] - ldr p, [y, #32] - csel a, a, p, ne - str a, [z, #32] + ldr a, [x, #32] + ldr p, [y, #32] + csel a, a, p, ne + str a, [z, #32] - ldr a, [x, #40] - ldr p, [y, #40] - csel a, a, p, ne - str a, [z, #40] + ldr a, [x, #40] + ldr p, [y, #40] + csel a, a, p, ne + str a, [z, #40] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_neg_p384.S b/arm/p384/bignum_neg_p384.S index 9a1d25d15a..94a23c3f11 100644 --- a/arm/p384/bignum_neg_p384.S +++ b/arm/p384/bignum_neg_p384.S @@ -46,49 +46,49 @@ _bignum_neg_p384: // Load the 6 digits of x - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Set a bitmask p for the input being nonzero, so that we avoid doing // -0 = p_384 and hence maintain strict modular reduction - orr p, d0, d1 - orr t, d2, d3 - orr p, p, t - orr t, d4, d5 - orr p, p, t - cmp p, #0 - csetm p, ne + orr p, d0, d1 + orr t, d2, d3 + orr p, p, t + orr t, d4, d5 + orr p, p, t + cmp p, #0 + csetm p, ne // Load and mask the complicated lower three words of // p_384 = [-1;-1;-1;n2;n1;n0] and subtract, using mask itself for upper digits - mov t, #0x00000000ffffffff - and t, t, p - subs d0, t, d0 + mov t, #0x00000000ffffffff + and t, t, p + subs d0, t, d0 - mov t, #0xffffffff00000000 - and t, t, p - sbcs d1, t, d1 + mov t, #0xffffffff00000000 + and t, t, p + sbcs d1, t, d1 - mov t, #0xfffffffffffffffe - and t, t, p - sbcs d2, t, d2 + mov t, #0xfffffffffffffffe + and t, t, p + sbcs d2, t, d2 - sbcs d3, p, d3 - sbcs d4, p, d4 - sbc d5, p, d5 + sbcs d3, p, d3 + sbcs d4, p, d4 + sbc d5, p, d5 // Write back the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_nonzero_6.S b/arm/p384/bignum_nonzero_6.S index aa3524e4ed..29e3ca6b31 100644 --- a/arm/p384/bignum_nonzero_6.S +++ b/arm/p384/bignum_nonzero_6.S @@ -38,21 +38,21 @@ _bignum_nonzero_6: // Generate a = an OR of all the words in the bignum - ldp a, d, [x] - orr a, a, d - ldp c, d, [x, #16] - orr c, c, d - orr a, a, c - ldp c, d, [x, #32] - orr c, c, d - orr a, a, c + ldp a, d, [x] + orr a, a, d + ldp c, d, [x, #16] + orr c, c, d + orr a, a, c + ldp c, d, [x, #32] + orr c, c, d + orr a, a, c // Set a standard C condition based on whether a is nonzero - cmp a, xzr - cset x0, ne + cmp a, xzr + cset x0, ne - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_optneg_p384.S b/arm/p384/bignum_optneg_p384.S index a5e11467a7..cc269d28d9 100644 --- a/arm/p384/bignum_optneg_p384.S +++ b/arm/p384/bignum_optneg_p384.S @@ -52,57 +52,57 @@ _bignum_optneg_p384: // Load the 6 digits of x - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] // Adjust p by zeroing it if the input is zero (to avoid giving -0 = p, which // is not strictly reduced even though it's correct modulo p) - orr n0, d0, d1 - orr n1, d2, d3 - orr n2, d4, d5 - orr n3, n0, n1 - orr n4, n2, n3 - cmp n4, #0 - csel p, xzr, p, eq + orr n0, d0, d1 + orr n1, d2, d3 + orr n2, d4, d5 + orr n3, n0, n1 + orr n4, n2, n3 + cmp n4, #0 + csel p, xzr, p, eq // Load the complicated lower three words of p_384 = [-1;-1;-1;n2;n1;n0] and -1 - mov n0, #0x00000000ffffffff - mov n1, #0xffffffff00000000 - mov n2, #0xfffffffffffffffe - mov n5, #0xffffffffffffffff + mov n0, #0x00000000ffffffff + mov n1, #0xffffffff00000000 + mov n2, #0xfffffffffffffffe + mov n5, #0xffffffffffffffff // Do the subtraction, which by hypothesis does not underflow - subs n0, n0, d0 - sbcs n1, n1, d1 - sbcs n2, n2, d2 - sbcs n3, n5, d3 - sbcs n4, n5, d4 - sbcs n5, n5, d5 + subs n0, n0, d0 + sbcs n1, n1, d1 + sbcs n2, n2, d2 + sbcs n3, n5, d3 + sbcs n4, n5, d4 + sbcs n5, n5, d5 // Set condition code if original x is nonzero and p was nonzero - cmp p, #0 + cmp p, #0 // Hence multiplex and write back - csel n0, n0, d0, ne - csel n1, n1, d1, ne - csel n2, n2, d2, ne - csel n3, n3, d3, ne - csel n4, n4, d4, ne - csel n5, n5, d5, ne + csel n0, n0, d0, ne + csel n1, n1, d1, ne + csel n2, n2, d2, ne + csel n3, n3, d3, ne + csel n4, n4, d4, ne + csel n5, n5, d5, ne - stp n0, n1, [z] - stp n2, n3, [z, #16] - stp n4, n5, [z, #32] + stp n0, n1, [z] + stp n2, n3, [z, #16] + stp n4, n5, [z, #32] // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_sub_p384.S b/arm/p384/bignum_sub_p384.S index a3edf9e7d3..5e83fc26a0 100644 --- a/arm/p384/bignum_sub_p384.S +++ b/arm/p384/bignum_sub_p384.S @@ -48,44 +48,44 @@ _bignum_sub_p384: // First just subtract the numbers as [d5; d4; d3; d2; d1; d0] // Set a mask based on (inverted) carry indicating x < y = correction is needed - ldp d0, d1, [x] - ldp l, c, [y] - subs d0, d0, l - sbcs d1, d1, c - ldp d2, d3, [x, #16] - ldp l, c, [y, #16] - sbcs d2, d2, l - sbcs d3, d3, c - ldp d4, d5, [x, #32] - ldp l, c, [y, #32] - sbcs d4, d4, l - sbcs d5, d5, c + ldp d0, d1, [x] + ldp l, c, [y] + subs d0, d0, l + sbcs d1, d1, c + ldp d2, d3, [x, #16] + ldp l, c, [y, #16] + sbcs d2, d2, l + sbcs d3, d3, c + ldp d4, d5, [x, #32] + ldp l, c, [y, #32] + sbcs d4, d4, l + sbcs d5, d5, c // Create a mask for the condition x < y, when we need to correct - csetm c, cc + csetm c, cc // Now correct by adding masked p_384 - mov l, #0x00000000ffffffff - and l, l, c - adds d0, d0, l - eor l, l, c - adcs d1, d1, l - mov l, #0xfffffffffffffffe - and l, l, c - adcs d2, d2, l - adcs d3, d3, c - adcs d4, d4, c - adc d5, d5, c + mov l, #0x00000000ffffffff + and l, l, c + adds d0, d0, l + eor l, l, c + adcs d1, d1, l + mov l, #0xfffffffffffffffe + and l, l, c + adcs d2, d2, l + adcs d3, d3, c + adcs d4, d4, c + adc d5, d5, c // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_tomont_p384.S b/arm/p384/bignum_tomont_p384.S index 45ab411cbb..a3ca9c3416 100644 --- a/arm/p384/bignum_tomont_p384.S +++ b/arm/p384/bignum_tomont_p384.S @@ -38,39 +38,39 @@ #define modstep_p384(d6,d5,d4,d3,d2,d1,d0, t1,t2,t3) \ /* Initial quotient approximation q = min (h + 1) (2^64 - 1) */ \ - adds d6, d6, #1; \ - csetm t3, cs; \ - add d6, d6, t3; \ - orn t3, xzr, t3; \ - sub t2, d6, #1; \ - sub t1, xzr, d6; \ + adds d6, d6, #1; \ + csetm t3, cs; \ + add d6, d6, t3; \ + orn t3, xzr, t3; \ + sub t2, d6, #1; \ + sub t1, xzr, d6; \ /* Correction term [d6;t2;t1;d0] = q * (2^384 - p_384) */ \ - lsl d0, t1, #32; \ - extr t1, t2, t1, #32; \ - lsr t2, t2, #32; \ - adds d0, d0, d6; \ - adcs t1, t1, xzr; \ - adcs t2, t2, d6; \ - adc d6, xzr, xzr; \ + lsl d0, t1, #32; \ + extr t1, t2, t1, #32; \ + lsr t2, t2, #32; \ + adds d0, d0, d6; \ + adcs t1, t1, xzr; \ + adcs t2, t2, d6; \ + adc d6, xzr, xzr; \ /* Addition to the initial value */ \ - adds d1, d1, t1; \ - adcs d2, d2, t2; \ - adcs d3, d3, d6; \ - adcs d4, d4, xzr; \ - adcs d5, d5, xzr; \ - adc t3, t3, xzr; \ + adds d1, d1, t1; \ + adcs d2, d2, t2; \ + adcs d3, d3, d6; \ + adcs d4, d4, xzr; \ + adcs d5, d5, xzr; \ + adc t3, t3, xzr; \ /* Use net top of the 7-word answer in t3 for masked correction */ \ - mov t1, #0x00000000ffffffff; \ - and t1, t1, t3; \ - adds d0, d0, t1; \ - eor t1, t1, t3; \ - adcs d1, d1, t1; \ - mov t1, #0xfffffffffffffffe; \ - and t1, t1, t3; \ - adcs d2, d2, t1; \ - adcs d3, d3, t3; \ - adcs d4, d4, t3; \ - adc d5, d5, t3 + mov t1, #0x00000000ffffffff; \ + and t1, t1, t3; \ + adds d0, d0, t1; \ + eor t1, t1, t3; \ + adcs d1, d1, t1; \ + mov t1, #0xfffffffffffffffe; \ + and t1, t1, t3; \ + adcs d2, d2, t1; \ + adcs d3, d3, t3; \ + adcs d4, d4, t3; \ + adc d5, d5, t3 bignum_tomont_p384: _bignum_tomont_p384: @@ -98,46 +98,46 @@ _bignum_tomont_p384_alt: // Load the inputs - ldp d0, d1, [x1] - ldp d2, d3, [x1, #16] - ldp d4, d5, [x1, #32] + ldp d0, d1, [x1] + ldp d2, d3, [x1, #16] + ldp d4, d5, [x1, #32] // Do an initial reduction to make sure this is < p_384, using just // a copy of the bignum_mod_p384 code. This is needed to set up the // invariant "input < p_384" for the main modular reduction steps. - mov n0, #0x00000000ffffffff - mov n1, #0xffffffff00000000 - mov n2, #0xfffffffffffffffe - subs n0, d0, n0 - sbcs n1, d1, n1 - sbcs n2, d2, n2 - adcs n3, d3, xzr - adcs n4, d4, xzr - adcs n5, d5, xzr - csel d0, d0, n0, cc - csel d1, d1, n1, cc - csel d2, d2, n2, cc - csel d3, d3, n3, cc - csel d4, d4, n4, cc - csel d5, d5, n5, cc + mov n0, #0x00000000ffffffff + mov n1, #0xffffffff00000000 + mov n2, #0xfffffffffffffffe + subs n0, d0, n0 + sbcs n1, d1, n1 + sbcs n2, d2, n2 + adcs n3, d3, xzr + adcs n4, d4, xzr + adcs n5, d5, xzr + csel d0, d0, n0, cc + csel d1, d1, n1, cc + csel d2, d2, n2, cc + csel d3, d3, n3, cc + csel d4, d4, n4, cc + csel d5, d5, n5, cc // Successively multiply by 2^64 and reduce - modstep_p384(d5,d4,d3,d2,d1,d0,d6, t1,t2,t3) - modstep_p384(d4,d3,d2,d1,d0,d6,d5, t1,t2,t3) - modstep_p384(d3,d2,d1,d0,d6,d5,d4, t1,t2,t3) - modstep_p384(d2,d1,d0,d6,d5,d4,d3, t1,t2,t3) - modstep_p384(d1,d0,d6,d5,d4,d3,d2, t1,t2,t3) - modstep_p384(d0,d6,d5,d4,d3,d2,d1, t1,t2,t3) + modstep_p384(d5,d4,d3,d2,d1,d0,d6, t1,t2,t3) + modstep_p384(d4,d3,d2,d1,d0,d6,d5, t1,t2,t3) + modstep_p384(d3,d2,d1,d0,d6,d5,d4, t1,t2,t3) + modstep_p384(d2,d1,d0,d6,d5,d4,d3, t1,t2,t3) + modstep_p384(d1,d0,d6,d5,d4,d3,d2, t1,t2,t3) + modstep_p384(d0,d6,d5,d4,d3,d2,d1, t1,t2,t3) // Store the result and return - stp d1, d2, [x0] - stp d3, d4, [x0, #16] - stp d5, d6, [x0, #32] + stp d1, d2, [x0] + stp d3, d4, [x0, #16] + stp d5, d6, [x0, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p384/bignum_triple_p384.S b/arm/p384/bignum_triple_p384.S index 639e833d6e..83ebcefbcf 100644 --- a/arm/p384/bignum_triple_p384.S +++ b/arm/p384/bignum_triple_p384.S @@ -71,71 +71,71 @@ _bignum_triple_p384_alt: // Load the inputs - ldp a0, a1, [x] - ldp a2, a3, [x, #16] - ldp a4, a5, [x, #32] + ldp a0, a1, [x] + ldp a2, a3, [x, #16] + ldp a4, a5, [x, #32] // First do the multiplication by 3, getting z = [h; d5; ...; d0] - lsl d0, a0, #1 - adds d0, d0, a0 - extr d1, a1, a0, #63 - adcs d1, d1, a1 - extr d2, a2, a1, #63 - adcs d2, d2, a2 - extr d3, a3, a2, #63 - adcs d3, d3, a3 - extr d4, a4, a3, #63 - adcs d4, d4, a4 - extr d5, a5, a4, #63 - adcs d5, d5, a5 - lsr h, a5, #63 - adc h, h, xzr + lsl d0, a0, #1 + adds d0, d0, a0 + extr d1, a1, a0, #63 + adcs d1, d1, a1 + extr d2, a2, a1, #63 + adcs d2, d2, a2 + extr d3, a3, a2, #63 + adcs d3, d3, a3 + extr d4, a4, a3, #63 + adcs d4, d4, a4 + extr d5, a5, a4, #63 + adcs d5, d5, a5 + lsr h, a5, #63 + adc h, h, xzr // For this limited range a simple quotient estimate of q = h + 1 works, where // h = floor(z / 2^384). Then -p_384 <= z - q * p_384 < p_384, so we just need // to subtract q * p_384 and then if that's negative, add back p_384. - add q, h, #1 + add q, h, #1 // Initial subtraction of z - q * p_384, with bitmask c for the carry // Actually done as an addition of (z - 2^384 * h) + q * (2^384 - p_384) // which, because q = h + 1, is exactly 2^384 + (z - q * p_384), and // therefore CF <=> 2^384 + (z - q * p_384) >= 2^384 <=> z >= q * p_384. - lsl t1, q, #32 - subs t0, q, t1 - sbc t1, t1, xzr + lsl t1, q, #32 + subs t0, q, t1 + sbc t1, t1, xzr - adds d0, d0, t0 - adcs d1, d1, t1 - adcs d2, d2, q - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - csetm c, cc + adds d0, d0, t0 + adcs d1, d1, t1 + adcs d2, d2, q + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + csetm c, cc // Use the bitmask c for final masked addition of p_384. - mov t0, #0x00000000ffffffff - and t0, t0, c - adds d0, d0, t0 - eor t0, t0, c - adcs d1, d1, t0 - mov t0, #0xfffffffffffffffe - and t0, t0, c - adcs d2, d2, t0 - adcs d3, d3, c - adcs d4, d4, c - adc d5, d5, c + mov t0, #0x00000000ffffffff + and t0, t0, c + adds d0, d0, t0 + eor t0, t0, c + adcs d1, d1, t0 + mov t0, #0xfffffffffffffffe + and t0, t0, c + adcs d2, d2, t0 + adcs d3, d3, c + adcs d4, d4, c + adc d5, d5, c // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_add_p521.S b/arm/p521/bignum_add_p521.S index e3597d7fde..0c47f9387c 100644 --- a/arm/p521/bignum_add_p521.S +++ b/arm/p521/bignum_add_p521.S @@ -50,56 +50,56 @@ _bignum_add_p521: // Force carry-in to get s = [d8;d7;d6;d5;d4;d3;d2;d1;d0] = x + y + 1. // We ignore the carry-out, assuming inputs are reduced so there is none. - subs xzr, xzr, xzr - ldp d0, d1, [x] - ldp l, h, [y] - adcs d0, d0, l - adcs d1, d1, h - ldp d2, d3, [x, #16] - ldp l, h, [y, #16] - adcs d2, d2, l - adcs d3, d3, h - ldp d4, d5, [x, #32] - ldp l, h, [y, #32] - adcs d4, d4, l - adcs d5, d5, h - ldp d6, d7, [x, #48] - ldp l, h, [y, #48] - adcs d6, d6, l - adcs d7, d7, h - ldr d8, [x, #64] - ldr l, [y, #64] - adc d8, d8, l + subs xzr, xzr, xzr + ldp d0, d1, [x] + ldp l, h, [y] + adcs d0, d0, l + adcs d1, d1, h + ldp d2, d3, [x, #16] + ldp l, h, [y, #16] + adcs d2, d2, l + adcs d3, d3, h + ldp d4, d5, [x, #32] + ldp l, h, [y, #32] + adcs d4, d4, l + adcs d5, d5, h + ldp d6, d7, [x, #48] + ldp l, h, [y, #48] + adcs d6, d6, l + adcs d7, d7, h + ldr d8, [x, #64] + ldr l, [y, #64] + adc d8, d8, l // Now x + y >= p_521 <=> s = x + y + 1 >= 2^521 // Set CF <=> s = x + y + 1 >= 2^521 and make it a mask in l as well - subs l, d8, #512 - csetm l, cs + subs l, d8, #512 + csetm l, cs // Now if CF is set (and l is all 1s), we want (x + y) - p_521 = s - 2^521 // while otherwise we want x + y = s - 1 (from existing CF, which is nice) - sbcs d0, d0, xzr - and l, l, #512 - sbcs d1, d1, xzr - sbcs d2, d2, xzr - sbcs d3, d3, xzr - sbcs d4, d4, xzr - sbcs d5, d5, xzr - sbcs d6, d6, xzr - sbcs d7, d7, xzr - sbc d8, d8, l + sbcs d0, d0, xzr + and l, l, #512 + sbcs d1, d1, xzr + sbcs d2, d2, xzr + sbcs d3, d3, xzr + sbcs d4, d4, xzr + sbcs d5, d5, xzr + sbcs d6, d6, xzr + sbcs d7, d7, xzr + sbc d8, d8, l // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_cmul_p521.S b/arm/p521/bignum_cmul_p521.S index 74621e93ba..0491551b4c 100644 --- a/arm/p521/bignum_cmul_p521.S +++ b/arm/p521/bignum_cmul_p521.S @@ -71,89 +71,89 @@ _bignum_cmul_p521_alt: // First do the multiply, getting [d9; ...; d0], and as this is done // accumulate an AND "dd" of digits d7,...,d1 for later use - ldp a0, a1, [x] - mul d0, c, a0 - mul d1, c, a1 - umulh a0, c, a0 - adds d1, d1, a0 - umulh a1, c, a1 - - ldp a2, a3, [x, #16] - mul d2, c, a2 - mul d3, c, a3 - umulh a2, c, a2 - adcs d2, d2, a1 - and dd, d1, d2 - umulh a3, c, a3 - adcs d3, d3, a2 - and dd, dd, d3 - - ldp a4, a5, [x, #32] - mul d4, c, a4 - mul d5, c, a5 - umulh a4, c, a4 - adcs d4, d4, a3 - and dd, dd, d4 - umulh a5, c, a5 - adcs d5, d5, a4 - and dd, dd, d5 - - ldp a6, a7, [x, #48] - mul d6, c, a6 - mul d7, c, a7 - umulh a6, c, a6 - adcs d6, d6, a5 - and dd, dd, d6 - umulh a7, c, a7 - adcs d7, d7, a6 - and dd, dd, d7 - - ldr a8, [x, #64] - mul d8, c, a8 - adcs d8, d8, a7 - umulh a8, c, a8 - adc d9, xzr, a8 + ldp a0, a1, [x] + mul d0, c, a0 + mul d1, c, a1 + umulh a0, c, a0 + adds d1, d1, a0 + umulh a1, c, a1 + + ldp a2, a3, [x, #16] + mul d2, c, a2 + mul d3, c, a3 + umulh a2, c, a2 + adcs d2, d2, a1 + and dd, d1, d2 + umulh a3, c, a3 + adcs d3, d3, a2 + and dd, dd, d3 + + ldp a4, a5, [x, #32] + mul d4, c, a4 + mul d5, c, a5 + umulh a4, c, a4 + adcs d4, d4, a3 + and dd, dd, d4 + umulh a5, c, a5 + adcs d5, d5, a4 + and dd, dd, d5 + + ldp a6, a7, [x, #48] + mul d6, c, a6 + mul d7, c, a7 + umulh a6, c, a6 + adcs d6, d6, a5 + and dd, dd, d6 + umulh a7, c, a7 + adcs d7, d7, a6 + and dd, dd, d7 + + ldr a8, [x, #64] + mul d8, c, a8 + adcs d8, d8, a7 + umulh a8, c, a8 + adc d9, xzr, a8 // Extract the high part h and mask off the low part l = [d8;d7;...;d0] // but stuff d8 with 1 bits at the left to ease a comparison below - extr h, d9, d8, #9 - orr d8, d8, #~0x1FF + extr h, d9, d8, #9 + orr d8, d8, #~0x1FF // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits d7,...d1 are all 1s, we use the AND of them "dd" to // condense the carry chain, and since we stuffed 1 bits into d8 we get // the result in CF without an additional comparison. - subs xzr, xzr, xzr - adcs xzr, d0, h - adcs xzr, dd, xzr - adcs xzr, d8, xzr + subs xzr, xzr, xzr + adcs xzr, d0, h + adcs xzr, dd, xzr + adcs xzr, d8, xzr // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // This masking also gets rid of the stuffing with 1s we did above. - adcs d0, d0, h - adcs d1, d1, xzr - adcs d2, d2, xzr - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - adcs d6, d6, xzr - adcs d7, d7, xzr - adc d8, d8, xzr - and d8, d8, #0x1FF + adcs d0, d0, h + adcs d1, d1, xzr + adcs d2, d2, xzr + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + adcs d6, d6, xzr + adcs d7, d7, xzr + adc d8, d8, xzr + and d8, d8, #0x1FF // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_deamont_p521.S b/arm/p521/bignum_deamont_p521.S index e1bf956121..f4c83d9229 100644 --- a/arm/p521/bignum_deamont_p521.S +++ b/arm/p521/bignum_deamont_p521.S @@ -60,67 +60,67 @@ _bignum_deamont_p521: // Load all the inputs - ldp d0, d1, [x] - ldp d2, d3, [x, #16] - ldp d4, d5, [x, #32] - ldp d6, d7, [x, #48] - ldr d8, [x, #64] + ldp d0, d1, [x] + ldp d2, d3, [x, #16] + ldp d4, d5, [x, #32] + ldp d6, d7, [x, #48] + ldr d8, [x, #64] // Stash the lowest 55 bits at the top of c, then shift the whole 576-bit // input right by 9*64 - 521 = 576 - 521 = 55 bits. As this is done, // accumulate an AND of words d0..d6. - lsl c, d0, #9 - extr d0, d1, d0, #55 - extr d1, d2, d1, #55 - and u, d0, d1 - extr d2, d3, d2, #55 - and u, u, d2 - extr d3, d4, d3, #55 - and u, u, d3 - extr d4, d5, d4, #55 - and u, u, d4 - extr d5, d6, d5, #55 - and u, u, d5 - extr d6, d7, d6, #55 - and u, u, d6 - extr d7, d8, d7, #55 - lsr d8, d8, #55 + lsl c, d0, #9 + extr d0, d1, d0, #55 + extr d1, d2, d1, #55 + and u, d0, d1 + extr d2, d3, d2, #55 + and u, u, d2 + extr d3, d4, d3, #55 + and u, u, d3 + extr d4, d5, d4, #55 + and u, u, d4 + extr d5, d6, d5, #55 + and u, u, d5 + extr d6, d7, d6, #55 + and u, u, d6 + extr d7, d8, d7, #55 + lsr d8, d8, #55 // Now writing x = 2^55 * h + l (so here [d8;..d0] = h and c = 2^9 * l) // we want (h + 2^{521-55} * l) mod p_521 = s mod p_521. Since s < 2 * p_521 // this is just "if s >= p_521 then s - p_521 else s". First get // CF <=> s >= p_521, creating the digits [h,l] to add for the l part. - adds xzr, u, #1 - lsl l, c, #9 - adcs xzr, d7, l - orr d8, d8, #~0x1FF - lsr h, c, #55 - adcs xzr, d8, h + adds xzr, u, #1 + lsl l, c, #9 + adcs xzr, d7, l + orr d8, d8, #~0x1FF + lsr h, c, #55 + adcs xzr, d8, h // Now the result = s mod p_521 = (if s >= p_521 then s - p_521 else s) = // (s + CF) mod 2^521. So do the addition inheriting the carry-in. - adcs d0, d0, xzr - adcs d1, d1, xzr - adcs d2, d2, xzr - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - adcs d6, d6, xzr - adcs d7, d7, l - adc d8, d8, h - and d8, d8, #0x1FF + adcs d0, d0, xzr + adcs d1, d1, xzr + adcs d2, d2, xzr + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + adcs d6, d6, xzr + adcs d7, d7, l + adc d8, d8, h + and d8, d8, #0x1FF // Store back the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] - ret + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_demont_p521.S b/arm/p521/bignum_demont_p521.S index 4c810f0989..bebd89715c 100644 --- a/arm/p521/bignum_demont_p521.S +++ b/arm/p521/bignum_demont_p521.S @@ -54,28 +54,28 @@ _bignum_demont_p521: // Rotate, as a 521-bit quantity, by 9*64 - 521 = 55 bits right. - ldp d0, d1, [x] - lsl c, d0, #9 - extr d0, d1, d0, #55 - ldp d2, d3, [x, #16] - extr d1, d2, d1, #55 - stp d0, d1, [z] - extr d2, d3, d2, #55 - ldp d4, d5, [x, #32] - extr d3, d4, d3, #55 - stp d2, d3, [z, #16] - extr d4, d5, d4, #55 - ldp d6, d7, [x, #48] - extr d5, d6, d5, #55 - stp d4, d5, [z, #32] - extr d6, d7, d6, #55 - ldr d8, [x, #64] - orr d8, d8, c - extr d7, d8, d7, #55 - stp d6, d7, [z, #48] - lsr d8, d8, #55 - str d8, [z, #64] - ret + ldp d0, d1, [x] + lsl c, d0, #9 + extr d0, d1, d0, #55 + ldp d2, d3, [x, #16] + extr d1, d2, d1, #55 + stp d0, d1, [z] + extr d2, d3, d2, #55 + ldp d4, d5, [x, #32] + extr d3, d4, d3, #55 + stp d2, d3, [z, #16] + extr d4, d5, d4, #55 + ldp d6, d7, [x, #48] + extr d5, d6, d5, #55 + stp d4, d5, [z, #32] + extr d6, d7, d6, #55 + ldr d8, [x, #64] + orr d8, d8, c + extr d7, d8, d7, #55 + stp d6, d7, [z, #48] + lsr d8, d8, #55 + str d8, [z, #64] + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_double_p521.S b/arm/p521/bignum_double_p521.S index 69ec99f4d3..75baf8b540 100644 --- a/arm/p521/bignum_double_p521.S +++ b/arm/p521/bignum_double_p521.S @@ -41,38 +41,38 @@ _bignum_double_p521: // We can decide whether 2 * x >= p_521 just by 2 * x >= 2^521, which // amounts to whether the top word is >= 256 - ldr c, [x, #64] - subs xzr, c, #256 + ldr c, [x, #64] + subs xzr, c, #256 // Now if 2 * x >= p_521 we want 2 * x - p_521 = (2 * x + 1) - 2^521 // and otherwise just 2 * x. Feed in the condition as the carry bit // to get 2 * x + [2 * x >= p_521] then just mask it off to 521 bits. - ldp l, h, [x] - adcs l, l, l - adcs h, h, h - stp l, h, [z] + ldp l, h, [x] + adcs l, l, l + adcs h, h, h + stp l, h, [z] - ldp l, h, [x, #16] - adcs l, l, l - adcs h, h, h - stp l, h, [z, #16] + ldp l, h, [x, #16] + adcs l, l, l + adcs h, h, h + stp l, h, [z, #16] - ldp l, h, [x, #32] - adcs l, l, l - adcs h, h, h - stp l, h, [z, #32] + ldp l, h, [x, #32] + adcs l, l, l + adcs h, h, h + stp l, h, [z, #32] - ldp l, h, [x, #48] - adcs l, l, l - adcs h, h, h - stp l, h, [z, #48] + ldp l, h, [x, #48] + adcs l, l, l + adcs h, h, h + stp l, h, [z, #48] - adc c, c, c - and c, c, #0x1FF - str c, [z, #64] + adc c, c, c + and c, c, #0x1FF + str c, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_fromlebytes_p521.S b/arm/p521/bignum_fromlebytes_p521.S index 6294c670c8..c7a7a013ef 100644 --- a/arm/p521/bignum_fromlebytes_p521.S +++ b/arm/p521/bignum_fromlebytes_p521.S @@ -42,173 +42,173 @@ _bignum_fromlebytes_p521: // word 0 - ldrb dshort, [x] - extr a, d, xzr, #8 - ldrb dshort, [x, #1] - extr a, d, a, #8 - ldrb dshort, [x, #2] - extr a, d, a, #8 - ldrb dshort, [x, #3] - extr a, d, a, #8 - ldrb dshort, [x, #4] - extr a, d, a, #8 - ldrb dshort, [x, #5] - extr a, d, a, #8 - ldrb dshort, [x, #6] - extr a, d, a, #8 - ldrb dshort, [x, #7] - extr a, d, a, #8 - str a, [z] + ldrb dshort, [x] + extr a, d, xzr, #8 + ldrb dshort, [x, #1] + extr a, d, a, #8 + ldrb dshort, [x, #2] + extr a, d, a, #8 + ldrb dshort, [x, #3] + extr a, d, a, #8 + ldrb dshort, [x, #4] + extr a, d, a, #8 + ldrb dshort, [x, #5] + extr a, d, a, #8 + ldrb dshort, [x, #6] + extr a, d, a, #8 + ldrb dshort, [x, #7] + extr a, d, a, #8 + str a, [z] // word 1 - ldrb dshort, [x, #8] - extr a, d, xzr, #8 - ldrb dshort, [x, #9] - extr a, d, a, #8 - ldrb dshort, [x, #10] - extr a, d, a, #8 - ldrb dshort, [x, #11] - extr a, d, a, #8 - ldrb dshort, [x, #12] - extr a, d, a, #8 - ldrb dshort, [x, #13] - extr a, d, a, #8 - ldrb dshort, [x, #14] - extr a, d, a, #8 - ldrb dshort, [x, #15] - extr a, d, a, #8 - str a, [z, #8] + ldrb dshort, [x, #8] + extr a, d, xzr, #8 + ldrb dshort, [x, #9] + extr a, d, a, #8 + ldrb dshort, [x, #10] + extr a, d, a, #8 + ldrb dshort, [x, #11] + extr a, d, a, #8 + ldrb dshort, [x, #12] + extr a, d, a, #8 + ldrb dshort, [x, #13] + extr a, d, a, #8 + ldrb dshort, [x, #14] + extr a, d, a, #8 + ldrb dshort, [x, #15] + extr a, d, a, #8 + str a, [z, #8] // word 2 - ldrb dshort, [x, #16] - extr a, d, xzr, #8 - ldrb dshort, [x, #17] - extr a, d, a, #8 - ldrb dshort, [x, #18] - extr a, d, a, #8 - ldrb dshort, [x, #19] - extr a, d, a, #8 - ldrb dshort, [x, #20] - extr a, d, a, #8 - ldrb dshort, [x, #21] - extr a, d, a, #8 - ldrb dshort, [x, #22] - extr a, d, a, #8 - ldrb dshort, [x, #23] - extr a, d, a, #8 - str a, [z, #16] + ldrb dshort, [x, #16] + extr a, d, xzr, #8 + ldrb dshort, [x, #17] + extr a, d, a, #8 + ldrb dshort, [x, #18] + extr a, d, a, #8 + ldrb dshort, [x, #19] + extr a, d, a, #8 + ldrb dshort, [x, #20] + extr a, d, a, #8 + ldrb dshort, [x, #21] + extr a, d, a, #8 + ldrb dshort, [x, #22] + extr a, d, a, #8 + ldrb dshort, [x, #23] + extr a, d, a, #8 + str a, [z, #16] // word 3 - ldrb dshort, [x, #24] - extr a, d, xzr, #8 - ldrb dshort, [x, #25] - extr a, d, a, #8 - ldrb dshort, [x, #26] - extr a, d, a, #8 - ldrb dshort, [x, #27] - extr a, d, a, #8 - ldrb dshort, [x, #28] - extr a, d, a, #8 - ldrb dshort, [x, #29] - extr a, d, a, #8 - ldrb dshort, [x, #30] - extr a, d, a, #8 - ldrb dshort, [x, #31] - extr a, d, a, #8 - str a, [z, #24] + ldrb dshort, [x, #24] + extr a, d, xzr, #8 + ldrb dshort, [x, #25] + extr a, d, a, #8 + ldrb dshort, [x, #26] + extr a, d, a, #8 + ldrb dshort, [x, #27] + extr a, d, a, #8 + ldrb dshort, [x, #28] + extr a, d, a, #8 + ldrb dshort, [x, #29] + extr a, d, a, #8 + ldrb dshort, [x, #30] + extr a, d, a, #8 + ldrb dshort, [x, #31] + extr a, d, a, #8 + str a, [z, #24] // word 4 - ldrb dshort, [x, #32] - extr a, d, xzr, #8 - ldrb dshort, [x, #33] - extr a, d, a, #8 - ldrb dshort, [x, #34] - extr a, d, a, #8 - ldrb dshort, [x, #35] - extr a, d, a, #8 - ldrb dshort, [x, #36] - extr a, d, a, #8 - ldrb dshort, [x, #37] - extr a, d, a, #8 - ldrb dshort, [x, #38] - extr a, d, a, #8 - ldrb dshort, [x, #39] - extr a, d, a, #8 - str a, [z, #32] + ldrb dshort, [x, #32] + extr a, d, xzr, #8 + ldrb dshort, [x, #33] + extr a, d, a, #8 + ldrb dshort, [x, #34] + extr a, d, a, #8 + ldrb dshort, [x, #35] + extr a, d, a, #8 + ldrb dshort, [x, #36] + extr a, d, a, #8 + ldrb dshort, [x, #37] + extr a, d, a, #8 + ldrb dshort, [x, #38] + extr a, d, a, #8 + ldrb dshort, [x, #39] + extr a, d, a, #8 + str a, [z, #32] // word 5 - ldrb dshort, [x, #40] - extr a, d, xzr, #8 - ldrb dshort, [x, #41] - extr a, d, a, #8 - ldrb dshort, [x, #42] - extr a, d, a, #8 - ldrb dshort, [x, #43] - extr a, d, a, #8 - ldrb dshort, [x, #44] - extr a, d, a, #8 - ldrb dshort, [x, #45] - extr a, d, a, #8 - ldrb dshort, [x, #46] - extr a, d, a, #8 - ldrb dshort, [x, #47] - extr a, d, a, #8 - str a, [z, #40] + ldrb dshort, [x, #40] + extr a, d, xzr, #8 + ldrb dshort, [x, #41] + extr a, d, a, #8 + ldrb dshort, [x, #42] + extr a, d, a, #8 + ldrb dshort, [x, #43] + extr a, d, a, #8 + ldrb dshort, [x, #44] + extr a, d, a, #8 + ldrb dshort, [x, #45] + extr a, d, a, #8 + ldrb dshort, [x, #46] + extr a, d, a, #8 + ldrb dshort, [x, #47] + extr a, d, a, #8 + str a, [z, #40] // word 6 - ldrb dshort, [x, #48] - extr a, d, xzr, #8 - ldrb dshort, [x, #49] - extr a, d, a, #8 - ldrb dshort, [x, #50] - extr a, d, a, #8 - ldrb dshort, [x, #51] - extr a, d, a, #8 - ldrb dshort, [x, #52] - extr a, d, a, #8 - ldrb dshort, [x, #53] - extr a, d, a, #8 - ldrb dshort, [x, #54] - extr a, d, a, #8 - ldrb dshort, [x, #55] - extr a, d, a, #8 - str a, [z, #48] + ldrb dshort, [x, #48] + extr a, d, xzr, #8 + ldrb dshort, [x, #49] + extr a, d, a, #8 + ldrb dshort, [x, #50] + extr a, d, a, #8 + ldrb dshort, [x, #51] + extr a, d, a, #8 + ldrb dshort, [x, #52] + extr a, d, a, #8 + ldrb dshort, [x, #53] + extr a, d, a, #8 + ldrb dshort, [x, #54] + extr a, d, a, #8 + ldrb dshort, [x, #55] + extr a, d, a, #8 + str a, [z, #48] // word 7 - ldrb dshort, [x, #56] - extr a, d, xzr, #8 - ldrb dshort, [x, #57] - extr a, d, a, #8 - ldrb dshort, [x, #58] - extr a, d, a, #8 - ldrb dshort, [x, #59] - extr a, d, a, #8 - ldrb dshort, [x, #60] - extr a, d, a, #8 - ldrb dshort, [x, #61] - extr a, d, a, #8 - ldrb dshort, [x, #62] - extr a, d, a, #8 - ldrb dshort, [x, #63] - extr a, d, a, #8 - str a, [z, #56] + ldrb dshort, [x, #56] + extr a, d, xzr, #8 + ldrb dshort, [x, #57] + extr a, d, a, #8 + ldrb dshort, [x, #58] + extr a, d, a, #8 + ldrb dshort, [x, #59] + extr a, d, a, #8 + ldrb dshort, [x, #60] + extr a, d, a, #8 + ldrb dshort, [x, #61] + extr a, d, a, #8 + ldrb dshort, [x, #62] + extr a, d, a, #8 + ldrb dshort, [x, #63] + extr a, d, a, #8 + str a, [z, #56] // word 8 - ldrb dshort, [x, #64] - extr a, d, xzr, #8 - ldrb dshort, [x, #65] - extr a, d, a, #56 - str a, [z, #64] + ldrb dshort, [x, #64] + extr a, d, xzr, #8 + ldrb dshort, [x, #65] + extr a, d, a, #56 + str a, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_half_p521.S b/arm/p521/bignum_half_p521.S index a06a11c020..e7c13c1c52 100644 --- a/arm/p521/bignum_half_p521.S +++ b/arm/p521/bignum_half_p521.S @@ -50,35 +50,35 @@ _bignum_half_p521: // We do a 521-bit rotation one bit right, since 2^521 == 1 (mod p_521) - ldp d0, d1, [x] - and a, d0, #1 - extr d0, d1, d0, #1 - - ldp d2, d3, [x, #16] - extr d1, d2, d1, #1 - stp d0, d1, [z] - extr d2, d3, d2, #1 - - ldp d4, d5, [x, #32] - extr d3, d4, d3, #1 - stp d2, d3, [z, #16] - extr d4, d5, d4, #1 - - ldp d6, d7, [x, #48] - extr d5, d6, d5, #1 - stp d4, d5, [z, #32] - extr d6, d7, d6, #1 - - ldr d8, [x, #64] - extr d7, d8, d7, #1 - stp d6, d7, [z, #48] - lsl d8, d8, #55 - extr d8, a, d8, #56 - str d8, [z, #64] + ldp d0, d1, [x] + and a, d0, #1 + extr d0, d1, d0, #1 + + ldp d2, d3, [x, #16] + extr d1, d2, d1, #1 + stp d0, d1, [z] + extr d2, d3, d2, #1 + + ldp d4, d5, [x, #32] + extr d3, d4, d3, #1 + stp d2, d3, [z, #16] + extr d4, d5, d4, #1 + + ldp d6, d7, [x, #48] + extr d5, d6, d5, #1 + stp d4, d5, [z, #32] + extr d6, d7, d6, #1 + + ldr d8, [x, #64] + extr d7, d8, d7, #1 + stp d6, d7, [z, #48] + lsl d8, d8, #55 + extr d8, a, d8, #56 + str d8, [z, #64] // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_mod_n521_9.S b/arm/p521/bignum_mod_n521_9.S index 1a0743a8c8..958f09d1ea 100644 --- a/arm/p521/bignum_mod_n521_9.S +++ b/arm/p521/bignum_mod_n521_9.S @@ -58,10 +58,10 @@ #define t d7 #define movbig(nn,n3,n2,n1,n0) \ - movz nn, n0; \ - movk nn, n1, lsl #16; \ - movk nn, n2, lsl #32; \ - movk nn, n3, lsl #48 + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 bignum_mod_n521_9: _bignum_mod_n521_9: @@ -71,84 +71,84 @@ _bignum_mod_n521_9_alt: // Load the top digit first into d8. // The initial quotient estimate is q = h + 1 where x = 2^521 * h + t - ldr d8, [x, #64] - lsr q, d8, #9 - add q, q, #1 + ldr d8, [x, #64] + lsr q, d8, #9 + add q, q, #1 // Let [5; n3; n2; n1; n0] = r_521 = 2^521 - n_521 // and form [d4;d3;d2;d1;d0] = q * r_521 - movbig( n0, #0x4490, #0x48e1, #0x6ec7, #0x9bf7) - mul d0, n0, q - movbig( n1, #0xc44a, #0x3647, #0x7663, #0xb851) - mul d1, n1, q - movbig( n2, #0x8033, #0xfeb7, #0x08f6, #0x5a2f) - mul d2, n2, q - movbig( n3, #0xae79, #0x787c, #0x40d0, #0x6994) - mul d3, n3, q - lsl d4, q, #2 - add d4, d4, q - umulh t, n0, q - adds d1, d1, t - umulh t, n1, q - adcs d2, d2, t - umulh t, n2, q - adcs d3, d3, t - umulh t, n3, q - adc d4, d4, t + movbig( n0, #0x4490, #0x48e1, #0x6ec7, #0x9bf7) + mul d0, n0, q + movbig( n1, #0xc44a, #0x3647, #0x7663, #0xb851) + mul d1, n1, q + movbig( n2, #0x8033, #0xfeb7, #0x08f6, #0x5a2f) + mul d2, n2, q + movbig( n3, #0xae79, #0x787c, #0x40d0, #0x6994) + mul d3, n3, q + lsl d4, q, #2 + add d4, d4, q + umulh t, n0, q + adds d1, d1, t + umulh t, n1, q + adcs d2, d2, t + umulh t, n2, q + adcs d3, d3, t + umulh t, n3, q + adc d4, d4, t // Now load other digits and form r = x - q * n_521 = (q * r_521 + t) - 2^521. // But the computed result stuffs in 1s from bit 521 onwards and actually // gives r' = (q * r_521 + t) + (2^576 - 2^521) = r + 2^576, including the // top carry. Hence CF <=> r >= 0, while r' == r (mod 2^521). - ldp s, t, [x] - adds d0, d0, s - adcs d1, d1, t - ldp s, t, [x, #16] - adcs d2, d2, s - adcs d3, d3, t - ldp t, d5, [x, #32] - adcs d4, d4, t - adcs d5, d5, xzr - ldp d6, d7, [x, #48] - adcs d6, d6, xzr - adcs d7, d7, xzr - orr d8, d8, #~0x1FF - adcs d8, d8, xzr + ldp s, t, [x] + adds d0, d0, s + adcs d1, d1, t + ldp s, t, [x, #16] + adcs d2, d2, s + adcs d3, d3, t + ldp t, d5, [x, #32] + adcs d4, d4, t + adcs d5, d5, xzr + ldp d6, d7, [x, #48] + adcs d6, d6, xzr + adcs d7, d7, xzr + orr d8, d8, #~0x1FF + adcs d8, d8, xzr // We already know r < n_521, but if it actually went negative then // we need to add back n_521 again. Recycle q as a bitmask for r < n_521, // and just subtract r_521 and mask rather than literally adding 2^521. // This also gets rid of the bit-stuffing above. - csetm q, cc - and n0, n0, q - subs d0, d0, n0 - and n1, n1, q - sbcs d1, d1, n1 - and n2, n2, q - sbcs d2, d2, n2 - and n3, n3, q - sbcs d3, d3, n3 - mov n0, #5 - and n0, n0, q - sbcs d4, d4, n0 - sbcs d5, d5, xzr - sbcs d6, d6, xzr - sbcs d7, d7, xzr - sbc d8, d8, xzr - and d8, d8, #0x1FF + csetm q, cc + and n0, n0, q + subs d0, d0, n0 + and n1, n1, q + sbcs d1, d1, n1 + and n2, n2, q + sbcs d2, d2, n2 + and n3, n3, q + sbcs d3, d3, n3 + mov n0, #5 + and n0, n0, q + sbcs d4, d4, n0 + sbcs d5, d5, xzr + sbcs d6, d6, xzr + sbcs d7, d7, xzr + sbc d8, d8, xzr + and d8, d8, #0x1FF // Store the end result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_mod_p521_9.S b/arm/p521/bignum_mod_p521_9.S index 32cbed5d3b..2a7d990100 100644 --- a/arm/p521/bignum_mod_p521_9.S +++ b/arm/p521/bignum_mod_p521_9.S @@ -50,8 +50,8 @@ _bignum_mod_p521_9: // separate out x = 2^521 * H + L with h = H. Now x mod p_521 = // (H + L) mod p_521 = if H + L >= p_521 then H + L - p_521 else H + L. - ldr d8, [x, #64] - lsr h, d8, #9 + ldr d8, [x, #64] + lsr h, d8, #9 // Load in the other digits and decide whether H + L >= p_521. This is // equivalent to H + L + 1 >= 2^521, and since this can only happen if @@ -60,45 +60,45 @@ _bignum_mod_p521_9: // This condenses only three pairs; the payoff beyond that seems limited. // By stuffing in 1 bits from 521 position upwards, get CF directly - subs xzr, xzr, xzr - ldp d0, d1, [x] - adcs xzr, d0, h - adcs xzr, d1, xzr - ldp d2, d3, [x, #16] - and t, d2, d3 - adcs xzr, t, xzr - ldp d4, d5, [x, #32] - and t, d4, d5 - adcs xzr, t, xzr - ldp d6, d7, [x, #48] - and t, d6, d7 - adcs xzr, t, xzr - orr t, d8, #~0x1FF - adcs t, t, xzr + subs xzr, xzr, xzr + ldp d0, d1, [x] + adcs xzr, d0, h + adcs xzr, d1, xzr + ldp d2, d3, [x, #16] + and t, d2, d3 + adcs xzr, t, xzr + ldp d4, d5, [x, #32] + and t, d4, d5 + adcs xzr, t, xzr + ldp d6, d7, [x, #48] + and t, d6, d7 + adcs xzr, t, xzr + orr t, d8, #~0x1FF + adcs t, t, xzr // Now H + L >= p_521 <=> H + L + 1 >= 2^521 <=> CF from this comparison. // So if CF is set we want (H + L) - p_521 = (H + L + 1) - 2^521 // while otherwise we want just H + L. So mask H + L + CF to 521 bits. - adcs d0, d0, h - adcs d1, d1, xzr - adcs d2, d2, xzr - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - adcs d6, d6, xzr - adcs d7, d7, xzr - adc d8, d8, xzr - and d8, d8, #0x1FF + adcs d0, d0, h + adcs d1, d1, xzr + adcs d2, d2, xzr + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + adcs d6, d6, xzr + adcs d7, d7, xzr + adc d8, d8, xzr + and d8, d8, #0x1FF // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] - ret + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_montmul_p521.S b/arm/p521/bignum_montmul_p521.S index 25acc32eef..6543afa732 100644 --- a/arm/p521/bignum_montmul_p521.S +++ b/arm/p521/bignum_montmul_p521.S @@ -95,174 +95,174 @@ #define mul4 \ /* First accumulate all the "simple" products as [s7,s6,s5,s4,s0] */ \ - \ - mul s0, a0, b0; \ - mul s4, a1, b1; \ - mul s5, a2, b2; \ - mul s6, a3, b3; \ - \ - umulh s7, a0, b0; \ - adds s4, s4, s7; \ - umulh s7, a1, b1; \ - adcs s5, s5, s7; \ - umulh s7, a2, b2; \ - adcs s6, s6, s7; \ - umulh s7, a3, b3; \ - adc s7, s7, xzr; \ - \ + \ + mul s0, a0, b0; \ + mul s4, a1, b1; \ + mul s5, a2, b2; \ + mul s6, a3, b3; \ + \ + umulh s7, a0, b0; \ + adds s4, s4, s7; \ + umulh s7, a1, b1; \ + adcs s5, s5, s7; \ + umulh s7, a2, b2; \ + adcs s6, s6, s7; \ + umulh s7, a3, b3; \ + adc s7, s7, xzr; \ + \ /* Multiply by B + 1 to get [s7;s6;s5;s4;s1;s0] */ \ - \ - adds s1, s4, s0; \ - adcs s4, s5, s4; \ - adcs s5, s6, s5; \ - adcs s6, s7, s6; \ - adc s7, xzr, s7; \ - \ + \ + adds s1, s4, s0; \ + adcs s4, s5, s4; \ + adcs s5, s6, s5; \ + adcs s6, s7, s6; \ + adc s7, xzr, s7; \ + \ /* Multiply by B^2 + 1 to get [s7;s6;s5;s4;s3;s2;s1;s0] */ \ - \ - adds s2, s4, s0; \ - adcs s3, s5, s1; \ - adcs s4, s6, s4; \ - adcs s5, s7, s5; \ - adcs s6, xzr, s6; \ - adc s7, xzr, s7; \ - \ + \ + adds s2, s4, s0; \ + adcs s3, s5, s1; \ + adcs s4, s6, s4; \ + adcs s5, s7, s5; \ + adcs s6, xzr, s6; \ + adc s7, xzr, s7; \ + \ /* Now add in all the "complicated" terms. */ \ - \ - muldiffnadd(s6,s5, a2,a3, b3,b2); \ - adc s7, s7, c; \ - \ - muldiffnadd(s2,s1, a0,a1, b1,b0); \ - adcs s3, s3, c; \ - adcs s4, s4, c; \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - \ - muldiffnadd(s5,s4, a1,a3, b3,b1); \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - \ - muldiffnadd(s3,s2, a0,a2, b2,b0); \ - adcs s4, s4, c; \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - \ - muldiffnadd(s4,s3, a0,a3, b3,b0); \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - muldiffnadd(s4,s3, a1,a2, b2,b1); \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c \ + \ + muldiffnadd(s6,s5, a2,a3, b3,b2); \ + adc s7, s7, c; \ + \ + muldiffnadd(s2,s1, a0,a1, b1,b0); \ + adcs s3, s3, c; \ + adcs s4, s4, c; \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + \ + muldiffnadd(s5,s4, a1,a3, b3,b1); \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + \ + muldiffnadd(s3,s2, a0,a2, b2,b0); \ + adcs s4, s4, c; \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + \ + muldiffnadd(s4,s3, a0,a3, b3,b0); \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + muldiffnadd(s4,s3, a1,a2, b2,b1); \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c \ bignum_montmul_p521: _bignum_montmul_p521: // Save registers and make space for the temporary buffer - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - sub sp, sp, #80 + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, #80 // Load 4-digit low parts and multiply them to get L - ldp a0, a1, [x] - ldp a2, a3, [x, #16] - ldp b0, b1, [y] - ldp b2, b3, [y, #16] - mul4 + ldp a0, a1, [x] + ldp a2, a3, [x, #16] + ldp b0, b1, [y] + ldp b2, b3, [y, #16] + mul4 // Shift right 256 bits modulo p_521 and stash in temp buffer - lsl c, s0, #9 - extr s0, s1, s0, #55 - extr s1, s2, s1, #55 - extr s2, s3, s2, #55 - lsr s3, s3, #55 - stp s4, s5, [sp] - stp s6, s7, [sp, #16] - stp c, s0, [sp, #32] - stp s1, s2, [sp, #48] - str s3, [sp, #64] + lsl c, s0, #9 + extr s0, s1, s0, #55 + extr s1, s2, s1, #55 + extr s2, s3, s2, #55 + lsr s3, s3, #55 + stp s4, s5, [sp] + stp s6, s7, [sp, #16] + stp c, s0, [sp, #32] + stp s1, s2, [sp, #48] + str s3, [sp, #64] // Load 4-digit low parts and multiply them to get H - ldp a0, a1, [x, #32] - ldp a2, a3, [x, #48] - ldp b0, b1, [y, #32] - ldp b2, b3, [y, #48] - mul4 + ldp a0, a1, [x, #32] + ldp a2, a3, [x, #48] + ldp b0, b1, [y, #32] + ldp b2, b3, [y, #48] + mul4 // Add to the existing temporary buffer and re-stash. // This gives a result HL congruent to (2^256 * H + L) / 2^256 modulo p_521 - ldp l, h, [sp] - adds s0, s0, l - adcs s1, s1, h - stp s0, s1, [sp] - ldp l, h, [sp, #16] - adcs s2, s2, l - adcs s3, s3, h - stp s2, s3, [sp, #16] - ldp l, h, [sp, #32] - adcs s4, s4, l - adcs s5, s5, h - stp s4, s5, [sp, #32] - ldp l, h, [sp, #48] - adcs s6, s6, l - adcs s7, s7, h - stp s6, s7, [sp, #48] - ldr c, [sp, #64] - adc c, c, xzr - str c, [sp, #64] + ldp l, h, [sp] + adds s0, s0, l + adcs s1, s1, h + stp s0, s1, [sp] + ldp l, h, [sp, #16] + adcs s2, s2, l + adcs s3, s3, h + stp s2, s3, [sp, #16] + ldp l, h, [sp, #32] + adcs s4, s4, l + adcs s5, s5, h + stp s4, s5, [sp, #32] + ldp l, h, [sp, #48] + adcs s6, s6, l + adcs s7, s7, h + stp s6, s7, [sp, #48] + ldr c, [sp, #64] + adc c, c, xzr + str c, [sp, #64] // Compute t,[a3,a2,a1,a0] = x_hi - x_lo // and s,[b3,b2,b1,b0] = y_lo - y_hi // sign-magnitude differences, then XOR overall sign bitmask into s - ldp l, h, [x] - subs a0, a0, l - sbcs a1, a1, h - ldp l, h, [x, #16] - sbcs a2, a2, l - sbcs a3, a3, h - csetm t, cc - ldp l, h, [y] - subs b0, l, b0 - sbcs b1, h, b1 - ldp l, h, [y, #16] - sbcs b2, l, b2 - sbcs b3, h, b3 - csetm s, cc - - eor a0, a0, t - subs a0, a0, t - eor a1, a1, t - sbcs a1, a1, t - eor a2, a2, t - sbcs a2, a2, t - eor a3, a3, t - sbc a3, a3, t - - eor b0, b0, s - subs b0, b0, s - eor b1, b1, s - sbcs b1, b1, s - eor b2, b2, s - sbcs b2, b2, s - eor b3, b3, s - sbc b3, b3, s - - eor s, s, t + ldp l, h, [x] + subs a0, a0, l + sbcs a1, a1, h + ldp l, h, [x, #16] + sbcs a2, a2, l + sbcs a3, a3, h + csetm t, cc + ldp l, h, [y] + subs b0, l, b0 + sbcs b1, h, b1 + ldp l, h, [y, #16] + sbcs b2, l, b2 + sbcs b3, h, b3 + csetm s, cc + + eor a0, a0, t + subs a0, a0, t + eor a1, a1, t + sbcs a1, a1, t + eor a2, a2, t + sbcs a2, a2, t + eor a3, a3, t + sbc a3, a3, t + + eor b0, b0, s + subs b0, b0, s + eor b1, b1, s + sbcs b1, b1, s + eor b2, b2, s + sbcs b2, b2, s + eor b3, b3, s + sbc b3, b3, s + + eor s, s, t // Now do yet a third 4x4 multiply to get mid-term product M - mul4 + mul4 // We now want, at the 256 position, 2^256 * HL + HL + (-1)^s * M // To keep things positive we use M' = p_521 - M in place of -M, @@ -278,48 +278,48 @@ _bignum_montmul_p521: // small c (s8 + suspended carry) to add at the 256 position here (512 // overall). This can be added in the next block (to b0 = sum4). - ldp a0, a1, [sp] - ldp a2, a3, [sp, #16] - - eor s0, s0, s - adds s0, s0, a0 - eor s1, s1, s - adcs s1, s1, a1 - eor s2, s2, s - adcs s2, s2, a2 - eor s3, s3, s - adcs s3, s3, a3 - eor s4, s4, s - - ldp b0, b1, [sp, #32] - ldp b2, b3, [sp, #48] - ldr s8, [sp, #64] - - adcs s4, s4, b0 - eor s5, s5, s - adcs s5, s5, b1 - eor s6, s6, s - adcs s6, s6, b2 - eor s7, s7, s - adcs s7, s7, b3 - adc c, s8, xzr - - adds s4, s4, a0 - adcs s5, s5, a1 - adcs s6, s6, a2 - adcs s7, s7, a3 - and s, s, #0x1FF - lsl t, s0, #9 - orr t, t, s - adcs b0, b0, t - extr t, s1, s0, #55 - adcs b1, b1, t - extr t, s2, s1, #55 - adcs b2, b2, t - extr t, s3, s2, #55 - adcs b3, b3, t - lsr t, s3, #55 - adc s8, t, s8 + ldp a0, a1, [sp] + ldp a2, a3, [sp, #16] + + eor s0, s0, s + adds s0, s0, a0 + eor s1, s1, s + adcs s1, s1, a1 + eor s2, s2, s + adcs s2, s2, a2 + eor s3, s3, s + adcs s3, s3, a3 + eor s4, s4, s + + ldp b0, b1, [sp, #32] + ldp b2, b3, [sp, #48] + ldr s8, [sp, #64] + + adcs s4, s4, b0 + eor s5, s5, s + adcs s5, s5, b1 + eor s6, s6, s + adcs s6, s6, b2 + eor s7, s7, s + adcs s7, s7, b3 + adc c, s8, xzr + + adds s4, s4, a0 + adcs s5, s5, a1 + adcs s6, s6, a2 + adcs s7, s7, a3 + and s, s, #0x1FF + lsl t, s0, #9 + orr t, t, s + adcs b0, b0, t + extr t, s1, s0, #55 + adcs b1, b1, t + extr t, s2, s1, #55 + adcs b2, b2, t + extr t, s3, s2, #55 + adcs b3, b3, t + lsr t, s3, #55 + adc s8, t, s8 // Augment the total with the contribution from the top little words // w and v. If we write the inputs as 2^512 * w + x and 2^512 * v + y @@ -366,239 +366,239 @@ _bignum_montmul_p521: // 0 * 52 = 64 * 0 + 0 - ldr v, [y, #64] - ldp c0, c1, [x] - and l, c0, #0x000fffffffffffff - mul l, v, l - ldr w, [x, #64] - ldp d0, d1, [y] - and t, d0, #0x000fffffffffffff - mul t, w, t - add l, l, t + ldr v, [y, #64] + ldp c0, c1, [x] + and l, c0, #0x000fffffffffffff + mul l, v, l + ldr w, [x, #64] + ldp d0, d1, [y] + and t, d0, #0x000fffffffffffff + mul t, w, t + add l, l, t // 1 * 52 = 64 * 0 + 52 - extr t, c1, c0, #52 - and t, t, #0x000fffffffffffff - mul h, v, t - extr t, d1, d0, #52 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t - lsr t, l, #52 - add h, h, t + extr t, c1, c0, #52 + and t, t, #0x000fffffffffffff + mul h, v, t + extr t, d1, d0, #52 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #12 - adds sum0, sum0, t + lsl l, l, #12 + extr t, h, l, #12 + adds sum0, sum0, t // 2 * 52 = 64 * 1 + 40 - ldp c2, c3, [x, #16] - ldp d2, d3, [y, #16] - extr t, c2, c1, #40 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d2, d1, #40 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t - lsr t, h, #52 - add l, l, t - - lsl h, h, #12 - extr t, l, h, #24 - adcs sum1, sum1, t + ldp c2, c3, [x, #16] + ldp d2, d3, [y, #16] + extr t, c2, c1, #40 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d2, d1, #40 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t + lsr t, h, #52 + add l, l, t + + lsl h, h, #12 + extr t, l, h, #24 + adcs sum1, sum1, t // 3 * 52 = 64 * 2 + 28 - extr t, c3, c2, #28 - and t, t, #0x000fffffffffffff - mul h, v, t - extr t, d3, d2, #28 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t - lsr t, l, #52 - add h, h, t - - lsl l, l, #12 - extr t, h, l, #36 - adcs sum2, sum2, t - and u, sum1, sum2 + extr t, c3, c2, #28 + and t, t, #0x000fffffffffffff + mul h, v, t + extr t, d3, d2, #28 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t + lsr t, l, #52 + add h, h, t + + lsl l, l, #12 + extr t, h, l, #36 + adcs sum2, sum2, t + and u, sum1, sum2 // 4 * 52 = 64 * 3 + 16 // At this point we also fold in the addition of c at the right place. // Note that 4 * 64 = 4 * 52 + 48 so we shift c left 48 places to align. - ldp c4, c5, [x, #32] - ldp d4, d5, [y, #32] - extr t, c4, c3, #16 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d4, d3, #16 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t + ldp c4, c5, [x, #32] + ldp d4, d5, [y, #32] + extr t, c4, c3, #16 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d4, d3, #16 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t - lsl c, c, #48 - add l, l, c + lsl c, c, #48 + add l, l, c - lsr t, h, #52 - add l, l, t + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #48 - adcs sum3, sum3, t - and u, u, sum3 + lsl h, h, #12 + extr t, l, h, #48 + adcs sum3, sum3, t + and u, u, sum3 // 5 * 52 = 64 * 4 + 4 - lsr t, c4, #4 - and t, t, #0x000fffffffffffff - mul h, v, t - lsr t, d4, #4 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t + lsr t, c4, #4 + and t, t, #0x000fffffffffffff + mul h, v, t + lsr t, d4, #4 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t - lsr t, l, #52 - add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr s, h, l, #60 + lsl l, l, #12 + extr s, h, l, #60 // 6 * 52 = 64 * 4 + 56 - extr t, c5, c4, #56 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d5, d4, #56 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t + extr t, c5, c4, #56 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d5, d4, #56 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t - lsr t, h, #52 - add l, l, t + lsr t, h, #52 + add l, l, t - lsl s, s, #8 - extr t, l, s, #8 - adcs sum4, sum4, t - and u, u, sum4 + lsl s, s, #8 + extr t, l, s, #8 + adcs sum4, sum4, t + and u, u, sum4 // 7 * 52 = 64 * 5 + 44 - ldp c6, c7, [x, #48] - ldp d6, d7, [y, #48] - extr t, c6, c5, #44 - and t, t, #0x000fffffffffffff - mul h, v, t - extr t, d6, d5, #44 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t + ldp c6, c7, [x, #48] + ldp d6, d7, [y, #48] + extr t, c6, c5, #44 + and t, t, #0x000fffffffffffff + mul h, v, t + extr t, d6, d5, #44 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t - lsr t, l, #52 - add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #20 - adcs sum5, sum5, t - and u, u, sum5 + lsl l, l, #12 + extr t, h, l, #20 + adcs sum5, sum5, t + and u, u, sum5 // 8 * 52 = 64 * 6 + 32 - extr t, c7, c6, #32 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d7, d6, #32 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t + extr t, c7, c6, #32 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d7, d6, #32 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t - lsr t, h, #52 - add l, l, t + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #32 - adcs sum6, sum6, t - and u, u, sum6 + lsl h, h, #12 + extr t, l, h, #32 + adcs sum6, sum6, t + and u, u, sum6 // 9 * 52 = 64 * 7 + 20 - lsr t, c7, #20 - mul h, v, t - lsr t, d7, #20 - mul t, w, t - add h, h, t + lsr t, c7, #20 + mul h, v, t + lsr t, d7, #20 + mul t, w, t + add h, h, t - lsr t, l, #52 - add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #44 - adcs sum7, sum7, t - and u, u, sum7 + lsl l, l, #12 + extr t, h, l, #44 + adcs sum7, sum7, t + and u, u, sum7 // Top word - mul t, v, w - lsr h, h, #44 - add t, t, h - adc sum8, sum8, t + mul t, v, w + lsr h, h, #44 + add t, t, h + adc sum8, sum8, t // Extract the high part h and mask off the low part l = [sum8;sum7;...;sum0] // but stuff sum8 with 1 bits at the left to ease a comparison below - lsr h, sum8, #9 - orr sum8, sum8, #~0x1FF + lsr h, sum8, #9 + orr sum8, sum8, #~0x1FF // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits sum7,...sum1 are all 1s, we use the AND of them "u" to // condense the carry chain, and since we stuffed 1 bits into sum8 we get // the result in CF without an additional comparison. - subs xzr, xzr, xzr - adcs xzr, sum0, h - adcs xzr, u, xzr - adcs xzr, sum8, xzr + subs xzr, xzr, xzr + adcs xzr, sum0, h + adcs xzr, u, xzr + adcs xzr, sum8, xzr // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // The masking is combined with the writeback in the next block. - adcs sum0, sum0, h - adcs sum1, sum1, xzr - adcs sum2, sum2, xzr - adcs sum3, sum3, xzr - adcs sum4, sum4, xzr - adcs sum5, sum5, xzr - adcs sum6, sum6, xzr - adcs sum7, sum7, xzr - adc sum8, sum8, xzr + adcs sum0, sum0, h + adcs sum1, sum1, xzr + adcs sum2, sum2, xzr + adcs sum3, sum3, xzr + adcs sum4, sum4, xzr + adcs sum5, sum5, xzr + adcs sum6, sum6, xzr + adcs sum7, sum7, xzr + adc sum8, sum8, xzr // The result is actually [sum8;...;sum0] == product / 2^512, since we are // in the 512 position. For Montgomery we want product / 2^576, so write // back [sum8;...;sum0] rotated right by 64 bits, as a 521-bit quantity. - stp sum1, sum2, [z] - stp sum3, sum4, [z, #16] - stp sum5, sum6, [z, #32] - lsl h, sum0, #9 - and sum8, sum8, #0x1FF - orr sum8, sum8, h - stp sum7, sum8, [z, #48] - lsr sum0, sum0, #55 - str sum0, [z, #64] + stp sum1, sum2, [z] + stp sum3, sum4, [z, #16] + stp sum5, sum6, [z, #32] + lsl h, sum0, #9 + and sum8, sum8, #0x1FF + orr sum8, sum8, h + stp sum7, sum8, [z, #48] + lsr sum0, sum0, #55 + str sum0, [z, #64] // Restore regs and return - add sp, sp, #80 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 - ret + add sp, sp, #80 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_montmul_p521_alt.S b/arm/p521/bignum_montmul_p521_alt.S index a211a3a85a..d31e280e22 100644 --- a/arm/p521/bignum_montmul_p521_alt.S +++ b/arm/p521/bignum_montmul_p521_alt.S @@ -87,478 +87,478 @@ _bignum_montmul_p521_alt: // Save more registers and make space for the temporary buffer - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - sub sp, sp, #64 + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, #64 // Load operands and set up row 0 = [u9;...;u0] = a0 * [b8;...;b0] - ldp a0, a1, [x] - ldp b0, b1, [y] + ldp a0, a1, [x] + ldp b0, b1, [y] - mul u0, a0, b0 - umulh u1, a0, b0 - mul t, a0, b1 - umulh u2, a0, b1 - adds u1, u1, t + mul u0, a0, b0 + umulh u1, a0, b0 + mul t, a0, b1 + umulh u2, a0, b1 + adds u1, u1, t - ldp b2, b3, [y, #16] + ldp b2, b3, [y, #16] - mul t, a0, b2 - umulh u3, a0, b2 - adcs u2, u2, t + mul t, a0, b2 + umulh u3, a0, b2 + adcs u2, u2, t - mul t, a0, b3 - umulh u4, a0, b3 - adcs u3, u3, t + mul t, a0, b3 + umulh u4, a0, b3 + adcs u3, u3, t - ldp b4, b5, [y, #32] + ldp b4, b5, [y, #32] - mul t, a0, b4 - umulh u5, a0, b4 - adcs u4, u4, t + mul t, a0, b4 + umulh u5, a0, b4 + adcs u4, u4, t - mul t, a0, b5 - umulh u6, a0, b5 - adcs u5, u5, t + mul t, a0, b5 + umulh u6, a0, b5 + adcs u5, u5, t - ldp b6, b7, [y, #48] + ldp b6, b7, [y, #48] - mul t, a0, b6 - umulh u7, a0, b6 - adcs u6, u6, t + mul t, a0, b6 + umulh u7, a0, b6 + adcs u6, u6, t - ldr b8, [y, #64] + ldr b8, [y, #64] - mul t, a0, b7 - umulh u8, a0, b7 - adcs u7, u7, t + mul t, a0, b7 + umulh u8, a0, b7 + adcs u7, u7, t - mul t, a0, b8 - umulh u9, a0, b8 - adcs u8, u8, t + mul t, a0, b8 + umulh u9, a0, b8 + adcs u8, u8, t - adc u9, u9, xzr + adc u9, u9, xzr // Row 1 = [u10;...;u0] = [a1;a0] * [b8;...;b0] - mul t, a1, b0 - adds u1, u1, t - mul t, a1, b1 - adcs u2, u2, t - mul t, a1, b2 - adcs u3, u3, t - mul t, a1, b3 - adcs u4, u4, t - mul t, a1, b4 - adcs u5, u5, t - mul t, a1, b5 - adcs u6, u6, t - mul t, a1, b6 - adcs u7, u7, t - mul t, a1, b7 - adcs u8, u8, t - mul t, a1, b8 - adcs u9, u9, t - cset u10, cs - - umulh t, a1, b0 - adds u2, u2, t - umulh t, a1, b1 - adcs u3, u3, t - umulh t, a1, b2 - adcs u4, u4, t - umulh t, a1, b3 - adcs u5, u5, t - umulh t, a1, b4 - adcs u6, u6, t - umulh t, a1, b5 - adcs u7, u7, t - umulh t, a1, b6 - adcs u8, u8, t - umulh t, a1, b7 - adcs u9, u9, t - umulh t, a1, b8 - adc u10, u10, t - - stp u0, u1, [sp] + mul t, a1, b0 + adds u1, u1, t + mul t, a1, b1 + adcs u2, u2, t + mul t, a1, b2 + adcs u3, u3, t + mul t, a1, b3 + adcs u4, u4, t + mul t, a1, b4 + adcs u5, u5, t + mul t, a1, b5 + adcs u6, u6, t + mul t, a1, b6 + adcs u7, u7, t + mul t, a1, b7 + adcs u8, u8, t + mul t, a1, b8 + adcs u9, u9, t + cset u10, cs + + umulh t, a1, b0 + adds u2, u2, t + umulh t, a1, b1 + adcs u3, u3, t + umulh t, a1, b2 + adcs u4, u4, t + umulh t, a1, b3 + adcs u5, u5, t + umulh t, a1, b4 + adcs u6, u6, t + umulh t, a1, b5 + adcs u7, u7, t + umulh t, a1, b6 + adcs u8, u8, t + umulh t, a1, b7 + adcs u9, u9, t + umulh t, a1, b8 + adc u10, u10, t + + stp u0, u1, [sp] // Row 2 = [u11;...;u0] = [a2;a1;a0] * [b8;...;b0] - ldp a2, a3, [x, #16] - - mul t, a2, b0 - adds u2, u2, t - mul t, a2, b1 - adcs u3, u3, t - mul t, a2, b2 - adcs u4, u4, t - mul t, a2, b3 - adcs u5, u5, t - mul t, a2, b4 - adcs u6, u6, t - mul t, a2, b5 - adcs u7, u7, t - mul t, a2, b6 - adcs u8, u8, t - mul t, a2, b7 - adcs u9, u9, t - mul t, a2, b8 - adcs u10, u10, t - cset u11, cs - - umulh t, a2, b0 - adds u3, u3, t - umulh t, a2, b1 - adcs u4, u4, t - umulh t, a2, b2 - adcs u5, u5, t - umulh t, a2, b3 - adcs u6, u6, t - umulh t, a2, b4 - adcs u7, u7, t - umulh t, a2, b5 - adcs u8, u8, t - umulh t, a2, b6 - adcs u9, u9, t - umulh t, a2, b7 - adcs u10, u10, t - umulh t, a2, b8 - adc u11, u11, t + ldp a2, a3, [x, #16] + + mul t, a2, b0 + adds u2, u2, t + mul t, a2, b1 + adcs u3, u3, t + mul t, a2, b2 + adcs u4, u4, t + mul t, a2, b3 + adcs u5, u5, t + mul t, a2, b4 + adcs u6, u6, t + mul t, a2, b5 + adcs u7, u7, t + mul t, a2, b6 + adcs u8, u8, t + mul t, a2, b7 + adcs u9, u9, t + mul t, a2, b8 + adcs u10, u10, t + cset u11, cs + + umulh t, a2, b0 + adds u3, u3, t + umulh t, a2, b1 + adcs u4, u4, t + umulh t, a2, b2 + adcs u5, u5, t + umulh t, a2, b3 + adcs u6, u6, t + umulh t, a2, b4 + adcs u7, u7, t + umulh t, a2, b5 + adcs u8, u8, t + umulh t, a2, b6 + adcs u9, u9, t + umulh t, a2, b7 + adcs u10, u10, t + umulh t, a2, b8 + adc u11, u11, t // Row 3 = [u12;...;u0] = [a3;a2;a1;a0] * [b8;...;b0] - mul t, a3, b0 - adds u3, u3, t - mul t, a3, b1 - adcs u4, u4, t - mul t, a3, b2 - adcs u5, u5, t - mul t, a3, b3 - adcs u6, u6, t - mul t, a3, b4 - adcs u7, u7, t - mul t, a3, b5 - adcs u8, u8, t - mul t, a3, b6 - adcs u9, u9, t - mul t, a3, b7 - adcs u10, u10, t - mul t, a3, b8 - adcs u11, u11, t - cset u12, cs - - umulh t, a3, b0 - adds u4, u4, t - umulh t, a3, b1 - adcs u5, u5, t - umulh t, a3, b2 - adcs u6, u6, t - umulh t, a3, b3 - adcs u7, u7, t - umulh t, a3, b4 - adcs u8, u8, t - umulh t, a3, b5 - adcs u9, u9, t - umulh t, a3, b6 - adcs u10, u10, t - umulh t, a3, b7 - adcs u11, u11, t - umulh t, a3, b8 - adc u12, u12, t - - stp u2, u3, [sp, #16] + mul t, a3, b0 + adds u3, u3, t + mul t, a3, b1 + adcs u4, u4, t + mul t, a3, b2 + adcs u5, u5, t + mul t, a3, b3 + adcs u6, u6, t + mul t, a3, b4 + adcs u7, u7, t + mul t, a3, b5 + adcs u8, u8, t + mul t, a3, b6 + adcs u9, u9, t + mul t, a3, b7 + adcs u10, u10, t + mul t, a3, b8 + adcs u11, u11, t + cset u12, cs + + umulh t, a3, b0 + adds u4, u4, t + umulh t, a3, b1 + adcs u5, u5, t + umulh t, a3, b2 + adcs u6, u6, t + umulh t, a3, b3 + adcs u7, u7, t + umulh t, a3, b4 + adcs u8, u8, t + umulh t, a3, b5 + adcs u9, u9, t + umulh t, a3, b6 + adcs u10, u10, t + umulh t, a3, b7 + adcs u11, u11, t + umulh t, a3, b8 + adc u12, u12, t + + stp u2, u3, [sp, #16] // Row 4 = [u13;...;u0] = [a4;a3;a2;a1;a0] * [b8;...;b0] - ldp a4, a5, [x, #32] - - mul t, a4, b0 - adds u4, u4, t - mul t, a4, b1 - adcs u5, u5, t - mul t, a4, b2 - adcs u6, u6, t - mul t, a4, b3 - adcs u7, u7, t - mul t, a4, b4 - adcs u8, u8, t - mul t, a4, b5 - adcs u9, u9, t - mul t, a4, b6 - adcs u10, u10, t - mul t, a4, b7 - adcs u11, u11, t - mul t, a4, b8 - adcs u12, u12, t - cset u13, cs - - umulh t, a4, b0 - adds u5, u5, t - umulh t, a4, b1 - adcs u6, u6, t - umulh t, a4, b2 - adcs u7, u7, t - umulh t, a4, b3 - adcs u8, u8, t - umulh t, a4, b4 - adcs u9, u9, t - umulh t, a4, b5 - adcs u10, u10, t - umulh t, a4, b6 - adcs u11, u11, t - umulh t, a4, b7 - adcs u12, u12, t - umulh t, a4, b8 - adc u13, u13, t + ldp a4, a5, [x, #32] + + mul t, a4, b0 + adds u4, u4, t + mul t, a4, b1 + adcs u5, u5, t + mul t, a4, b2 + adcs u6, u6, t + mul t, a4, b3 + adcs u7, u7, t + mul t, a4, b4 + adcs u8, u8, t + mul t, a4, b5 + adcs u9, u9, t + mul t, a4, b6 + adcs u10, u10, t + mul t, a4, b7 + adcs u11, u11, t + mul t, a4, b8 + adcs u12, u12, t + cset u13, cs + + umulh t, a4, b0 + adds u5, u5, t + umulh t, a4, b1 + adcs u6, u6, t + umulh t, a4, b2 + adcs u7, u7, t + umulh t, a4, b3 + adcs u8, u8, t + umulh t, a4, b4 + adcs u9, u9, t + umulh t, a4, b5 + adcs u10, u10, t + umulh t, a4, b6 + adcs u11, u11, t + umulh t, a4, b7 + adcs u12, u12, t + umulh t, a4, b8 + adc u13, u13, t // Row 5 = [u14;...;u0] = [a5;a4;a3;a2;a1;a0] * [b8;...;b0] - mul t, a5, b0 - adds u5, u5, t - mul t, a5, b1 - adcs u6, u6, t - mul t, a5, b2 - adcs u7, u7, t - mul t, a5, b3 - adcs u8, u8, t - mul t, a5, b4 - adcs u9, u9, t - mul t, a5, b5 - adcs u10, u10, t - mul t, a5, b6 - adcs u11, u11, t - mul t, a5, b7 - adcs u12, u12, t - mul t, a5, b8 - adcs u13, u13, t - cset u14, cs - - umulh t, a5, b0 - adds u6, u6, t - umulh t, a5, b1 - adcs u7, u7, t - umulh t, a5, b2 - adcs u8, u8, t - umulh t, a5, b3 - adcs u9, u9, t - umulh t, a5, b4 - adcs u10, u10, t - umulh t, a5, b5 - adcs u11, u11, t - umulh t, a5, b6 - adcs u12, u12, t - umulh t, a5, b7 - adcs u13, u13, t - umulh t, a5, b8 - adc u14, u14, t - - stp u4, u5, [sp, #32] + mul t, a5, b0 + adds u5, u5, t + mul t, a5, b1 + adcs u6, u6, t + mul t, a5, b2 + adcs u7, u7, t + mul t, a5, b3 + adcs u8, u8, t + mul t, a5, b4 + adcs u9, u9, t + mul t, a5, b5 + adcs u10, u10, t + mul t, a5, b6 + adcs u11, u11, t + mul t, a5, b7 + adcs u12, u12, t + mul t, a5, b8 + adcs u13, u13, t + cset u14, cs + + umulh t, a5, b0 + adds u6, u6, t + umulh t, a5, b1 + adcs u7, u7, t + umulh t, a5, b2 + adcs u8, u8, t + umulh t, a5, b3 + adcs u9, u9, t + umulh t, a5, b4 + adcs u10, u10, t + umulh t, a5, b5 + adcs u11, u11, t + umulh t, a5, b6 + adcs u12, u12, t + umulh t, a5, b7 + adcs u13, u13, t + umulh t, a5, b8 + adc u14, u14, t + + stp u4, u5, [sp, #32] // Row 6 = [u15;...;u0] = [a6;a5;a4;a3;a2;a1;a0] * [b8;...;b0] - ldp a6, a7, [x, #48] - - mul t, a6, b0 - adds u6, u6, t - mul t, a6, b1 - adcs u7, u7, t - mul t, a6, b2 - adcs u8, u8, t - mul t, a6, b3 - adcs u9, u9, t - mul t, a6, b4 - adcs u10, u10, t - mul t, a6, b5 - adcs u11, u11, t - mul t, a6, b6 - adcs u12, u12, t - mul t, a6, b7 - adcs u13, u13, t - mul t, a6, b8 - adcs u14, u14, t - cset u15, cs - - umulh t, a6, b0 - adds u7, u7, t - umulh t, a6, b1 - adcs u8, u8, t - umulh t, a6, b2 - adcs u9, u9, t - umulh t, a6, b3 - adcs u10, u10, t - umulh t, a6, b4 - adcs u11, u11, t - umulh t, a6, b5 - adcs u12, u12, t - umulh t, a6, b6 - adcs u13, u13, t - umulh t, a6, b7 - adcs u14, u14, t - umulh t, a6, b8 - adc u15, u15, t + ldp a6, a7, [x, #48] + + mul t, a6, b0 + adds u6, u6, t + mul t, a6, b1 + adcs u7, u7, t + mul t, a6, b2 + adcs u8, u8, t + mul t, a6, b3 + adcs u9, u9, t + mul t, a6, b4 + adcs u10, u10, t + mul t, a6, b5 + adcs u11, u11, t + mul t, a6, b6 + adcs u12, u12, t + mul t, a6, b7 + adcs u13, u13, t + mul t, a6, b8 + adcs u14, u14, t + cset u15, cs + + umulh t, a6, b0 + adds u7, u7, t + umulh t, a6, b1 + adcs u8, u8, t + umulh t, a6, b2 + adcs u9, u9, t + umulh t, a6, b3 + adcs u10, u10, t + umulh t, a6, b4 + adcs u11, u11, t + umulh t, a6, b5 + adcs u12, u12, t + umulh t, a6, b6 + adcs u13, u13, t + umulh t, a6, b7 + adcs u14, u14, t + umulh t, a6, b8 + adc u15, u15, t // Row 7 = [u16;...;u0] = [a7;a6;a5;a4;a3;a2;a1;a0] * [b8;...;b0] - mul t, a7, b0 - adds u7, u7, t - mul t, a7, b1 - adcs u8, u8, t - mul t, a7, b2 - adcs u9, u9, t - mul t, a7, b3 - adcs u10, u10, t - mul t, a7, b4 - adcs u11, u11, t - mul t, a7, b5 - adcs u12, u12, t - mul t, a7, b6 - adcs u13, u13, t - mul t, a7, b7 - adcs u14, u14, t - mul t, a7, b8 - adcs u15, u15, t - cset u16, cs - - umulh t, a7, b0 - adds u8, u8, t - umulh t, a7, b1 - adcs u9, u9, t - umulh t, a7, b2 - adcs u10, u10, t - umulh t, a7, b3 - adcs u11, u11, t - umulh t, a7, b4 - adcs u12, u12, t - umulh t, a7, b5 - adcs u13, u13, t - umulh t, a7, b6 - adcs u14, u14, t - umulh t, a7, b7 - adcs u15, u15, t - umulh t, a7, b8 - adc u16, u16, t - - stp u6, u7, [sp, #48] + mul t, a7, b0 + adds u7, u7, t + mul t, a7, b1 + adcs u8, u8, t + mul t, a7, b2 + adcs u9, u9, t + mul t, a7, b3 + adcs u10, u10, t + mul t, a7, b4 + adcs u11, u11, t + mul t, a7, b5 + adcs u12, u12, t + mul t, a7, b6 + adcs u13, u13, t + mul t, a7, b7 + adcs u14, u14, t + mul t, a7, b8 + adcs u15, u15, t + cset u16, cs + + umulh t, a7, b0 + adds u8, u8, t + umulh t, a7, b1 + adcs u9, u9, t + umulh t, a7, b2 + adcs u10, u10, t + umulh t, a7, b3 + adcs u11, u11, t + umulh t, a7, b4 + adcs u12, u12, t + umulh t, a7, b5 + adcs u13, u13, t + umulh t, a7, b6 + adcs u14, u14, t + umulh t, a7, b7 + adcs u15, u15, t + umulh t, a7, b8 + adc u16, u16, t + + stp u6, u7, [sp, #48] // Row 8 = [u16;...;u0] = [a8;a7;a6;a5;a4;a3;a2;a1;a0] * [b8;...;b0] - ldr a8, [x, #64] - - mul t, a8, b0 - adds u8, u8, t - mul t, a8, b1 - adcs u9, u9, t - mul t, a8, b2 - adcs u10, u10, t - mul t, a8, b3 - adcs u11, u11, t - mul t, a8, b4 - adcs u12, u12, t - mul t, a8, b5 - adcs u13, u13, t - mul t, a8, b6 - adcs u14, u14, t - mul t, a8, b7 - adcs u15, u15, t - mul t, a8, b8 - adc u16, u16, t - - umulh t, a8, b0 - adds u9, u9, t - umulh t, a8, b1 - adcs u10, u10, t - umulh t, a8, b2 - adcs u11, u11, t - umulh t, a8, b3 - adcs u12, u12, t - umulh t, a8, b4 - adcs u13, u13, t - umulh t, a8, b5 - adcs u14, u14, t - umulh t, a8, b6 - adcs u15, u15, t - umulh t, a8, b7 - adc u16, u16, t + ldr a8, [x, #64] + + mul t, a8, b0 + adds u8, u8, t + mul t, a8, b1 + adcs u9, u9, t + mul t, a8, b2 + adcs u10, u10, t + mul t, a8, b3 + adcs u11, u11, t + mul t, a8, b4 + adcs u12, u12, t + mul t, a8, b5 + adcs u13, u13, t + mul t, a8, b6 + adcs u14, u14, t + mul t, a8, b7 + adcs u15, u15, t + mul t, a8, b8 + adc u16, u16, t + + umulh t, a8, b0 + adds u9, u9, t + umulh t, a8, b1 + adcs u10, u10, t + umulh t, a8, b2 + adcs u11, u11, t + umulh t, a8, b3 + adcs u12, u12, t + umulh t, a8, b4 + adcs u13, u13, t + umulh t, a8, b5 + adcs u14, u14, t + umulh t, a8, b6 + adcs u15, u15, t + umulh t, a8, b7 + adc u16, u16, t // Now we have the full product, which we consider as // 2^521 * h + l. Form h + l + 1 - subs xzr, xzr, xzr - ldp b0, b1, [sp] - extr t, u9, u8, #9 - adcs b0, b0, t - extr t, u10, u9, #9 - adcs b1, b1, t - ldp b2, b3, [sp, #16] - extr t, u11, u10, #9 - adcs b2, b2, t - extr t, u12, u11, #9 - adcs b3, b3, t - ldp b4, b5, [sp, #32] - extr t, u13, u12, #9 - adcs b4, b4, t - extr t, u14, u13, #9 - adcs b5, b5, t - ldp b6, b7, [sp, #48] - extr t, u15, u14, #9 - adcs b6, b6, t - extr t, u16, u15, #9 - adcs b7, b7, t - orr b8, u8, #~0x1FF - lsr t, u16, #9 - adcs b8, b8, t + subs xzr, xzr, xzr + ldp b0, b1, [sp] + extr t, u9, u8, #9 + adcs b0, b0, t + extr t, u10, u9, #9 + adcs b1, b1, t + ldp b2, b3, [sp, #16] + extr t, u11, u10, #9 + adcs b2, b2, t + extr t, u12, u11, #9 + adcs b3, b3, t + ldp b4, b5, [sp, #32] + extr t, u13, u12, #9 + adcs b4, b4, t + extr t, u14, u13, #9 + adcs b5, b5, t + ldp b6, b7, [sp, #48] + extr t, u15, u14, #9 + adcs b6, b6, t + extr t, u16, u15, #9 + adcs b7, b7, t + orr b8, u8, #~0x1FF + lsr t, u16, #9 + adcs b8, b8, t // Now CF is set if h + l + 1 >= 2^521, which means it's already // the answer, while if ~CF the answer is h + l so we should subtract // 1 (all considered in 521 bits). Hence subtract ~CF and mask. - sbcs b0, b0, xzr - sbcs b1, b1, xzr - sbcs b2, b2, xzr - sbcs b3, b3, xzr - sbcs b4, b4, xzr - sbcs b5, b5, xzr - sbcs b6, b6, xzr - sbcs b7, b7, xzr - sbc b8, b8, xzr - and b8, b8, #0x1FF + sbcs b0, b0, xzr + sbcs b1, b1, xzr + sbcs b2, b2, xzr + sbcs b3, b3, xzr + sbcs b4, b4, xzr + sbcs b5, b5, xzr + sbcs b6, b6, xzr + sbcs b7, b7, xzr + sbc b8, b8, xzr + and b8, b8, #0x1FF // So far, this has been the same as a pure modular multiplication. // Now finally the Montgomery ingredient, which is just a 521-bit // rotation by 9*64 - 521 = 55 bits right. - lsl t, b0, #9 - extr b0, b1, b0, #55 - extr b1, b2, b1, #55 - extr b2, b3, b2, #55 - extr b3, b4, b3, #55 - orr b8, b8, t - extr b4, b5, b4, #55 - extr b5, b6, b5, #55 - extr b6, b7, b6, #55 - extr b7, b8, b7, #55 - lsr b8, b8, #55 + lsl t, b0, #9 + extr b0, b1, b0, #55 + extr b1, b2, b1, #55 + extr b2, b3, b2, #55 + extr b3, b4, b3, #55 + orr b8, b8, t + extr b4, b5, b4, #55 + extr b5, b6, b5, #55 + extr b6, b7, b6, #55 + extr b7, b8, b7, #55 + lsr b8, b8, #55 // Store back digits of final result - stp b0, b1, [z] - stp b2, b3, [z, #16] - stp b4, b5, [z, #32] - stp b6, b7, [z, #48] - str b8, [z, #64] + stp b0, b1, [z] + stp b2, b3, [z, #16] + stp b4, b5, [z, #32] + stp b6, b7, [z, #48] + str b8, [z, #64] // Restore registers - add sp, sp, #64 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + add sp, sp, #64 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_montsqr_p521.S b/arm/p521/bignum_montsqr_p521.S index 2339c04e75..fb112ccb3a 100644 --- a/arm/p521/bignum_montsqr_p521.S +++ b/arm/p521/bignum_montsqr_p521.S @@ -78,82 +78,82 @@ _bignum_montsqr_p521: // Save registers - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! // Load all the inputs first - ldp a0, a1, [x] - ldp a2, a3, [x, #16] - ldp b0, b1, [x, #32] - ldp b2, b3, [x, #48] + ldp a0, a1, [x] + ldp a2, a3, [x, #16] + ldp b0, b1, [x, #32] + ldp b2, b3, [x, #48] // Square the upper half with a register-renamed variant of bignum_sqr_4_8 - mul s2, b0, b2 - mul s7, b1, b3 - umulh t, b0, b2 - subs u, b0, b1 - cneg u, u, cc - csetm s1, cc - subs s0, b3, b2 - cneg s0, s0, cc - mul s6, u, s0 - umulh s0, u, s0 - cinv s1, s1, cc - eor s6, s6, s1 - eor s0, s0, s1 - adds s3, s2, t - adc t, t, xzr - umulh u, b1, b3 - adds s3, s3, s7 - adcs t, t, u - adc u, u, xzr - adds t, t, s7 - adc u, u, xzr - cmn s1, #0x1 - adcs s3, s3, s6 - adcs t, t, s0 - adc u, u, s1 - adds s2, s2, s2 - adcs s3, s3, s3 - adcs t, t, t - adcs u, u, u - adc c, xzr, xzr - mul s0, b0, b0 - mul s6, b1, b1 - mul l, b0, b1 - umulh s1, b0, b0 - umulh s7, b1, b1 - umulh h, b0, b1 - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s2, s2, s6 - adcs s3, s3, s7 - adcs t, t, xzr - adcs u, u, xzr - adc c, c, xzr - mul s4, b2, b2 - mul s6, b3, b3 - mul l, b2, b3 - umulh s5, b2, b2 - umulh s7, b3, b3 - umulh h, b2, b3 - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s4, s4, t - adcs s5, s5, u - adcs s6, s6, c - adc s7, s7, xzr + mul s2, b0, b2 + mul s7, b1, b3 + umulh t, b0, b2 + subs u, b0, b1 + cneg u, u, cc + csetm s1, cc + subs s0, b3, b2 + cneg s0, s0, cc + mul s6, u, s0 + umulh s0, u, s0 + cinv s1, s1, cc + eor s6, s6, s1 + eor s0, s0, s1 + adds s3, s2, t + adc t, t, xzr + umulh u, b1, b3 + adds s3, s3, s7 + adcs t, t, u + adc u, u, xzr + adds t, t, s7 + adc u, u, xzr + cmn s1, #0x1 + adcs s3, s3, s6 + adcs t, t, s0 + adc u, u, s1 + adds s2, s2, s2 + adcs s3, s3, s3 + adcs t, t, t + adcs u, u, u + adc c, xzr, xzr + mul s0, b0, b0 + mul s6, b1, b1 + mul l, b0, b1 + umulh s1, b0, b0 + umulh s7, b1, b1 + umulh h, b0, b1 + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s2, s2, s6 + adcs s3, s3, s7 + adcs t, t, xzr + adcs u, u, xzr + adc c, c, xzr + mul s4, b2, b2 + mul s6, b3, b3 + mul l, b2, b3 + umulh s5, b2, b2 + umulh s7, b3, b3 + umulh h, b2, b3 + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s4, s4, t + adcs s5, s5, u + adcs s6, s6, c + adc s7, s7, xzr // Augment the high part with the contribution from the top little word C. // If we write the input as 2^512 * C + x then we are otherwise just doing @@ -168,364 +168,364 @@ _bignum_montsqr_p521: // equally well use 53 or 54 since they are still <= 64 - 10, but below // 52 we would end up using more multiplications. - ldr c, [x, #64] - add u, c, c - mul c, c, c + ldr c, [x, #64] + add u, c, c + mul c, c, c // 0 * 52 = 64 * 0 + 0 - and l, a0, #0x000fffffffffffff - mul l, u, l + and l, a0, #0x000fffffffffffff + mul l, u, l // 1 * 52 = 64 * 0 + 52 - extr h, a1, a0, #52 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + extr h, a1, a0, #52 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #12 - adds s0, s0, t + lsl l, l, #12 + extr t, h, l, #12 + adds s0, s0, t // 2 * 52 = 64 * 1 + 40 - extr l, a2, a1, #40 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, a2, a1, #40 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #24 - adcs s1, s1, t + lsl h, h, #12 + extr t, l, h, #24 + adcs s1, s1, t // 3 * 52 = 64 * 2 + 28 - extr h, a3, a2, #28 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + extr h, a3, a2, #28 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #36 - adcs s2, s2, t + lsl l, l, #12 + extr t, h, l, #36 + adcs s2, s2, t // 4 * 52 = 64 * 3 + 16 - extr l, b0, a3, #16 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, b0, a3, #16 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #48 - adcs s3, s3, t + lsl h, h, #12 + extr t, l, h, #48 + adcs s3, s3, t // 5 * 52 = 64 * 4 + 4 - lsr h, b0, #4 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + lsr h, b0, #4 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr v, h, l, #60 + lsl l, l, #12 + extr v, h, l, #60 // 6 * 52 = 64 * 4 + 56 - extr l, b1, b0, #56 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, b1, b0, #56 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl v, v, #8 - extr t, l, v, #8 - adcs s4, s4, t + lsl v, v, #8 + extr t, l, v, #8 + adcs s4, s4, t // 7 * 52 = 64 * 5 + 44 - extr h, b2, b1, #44 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + extr h, b2, b1, #44 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #20 - adcs s5, s5, t + lsl l, l, #12 + extr t, h, l, #20 + adcs s5, s5, t // 8 * 52 = 64 * 6 + 32 - extr l, b3, b2, #32 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, b3, b2, #32 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #32 - adcs s6, s6, t + lsl h, h, #12 + extr t, l, h, #32 + adcs s6, s6, t // 9 * 52 = 64 * 7 + 20 - lsr h, b3, #20 - mul h, u, h - lsr t, l, #52 - add h, h, t + lsr h, b3, #20 + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #44 - adcs s7, s7, t + lsl l, l, #12 + extr t, h, l, #44 + adcs s7, s7, t // Top word - lsr h, h, #44 - adc c, c, h + lsr h, h, #44 + adc c, c, h // Rotate [c;s7;...;s0] before storing in the buffer. // We want to add 2^512 * H', which splitting H' at bit 9 is // 2^521 * H_top + 2^512 * H_bot == 2^512 * H_bot + H_top (mod p_521) - extr l, s1, s0, #9 - extr h, s2, s1, #9 - stp l, h, [z] + extr l, s1, s0, #9 + extr h, s2, s1, #9 + stp l, h, [z] - extr l, s3, s2, #9 - extr h, s4, s3, #9 - stp l, h, [z, #16] + extr l, s3, s2, #9 + extr h, s4, s3, #9 + stp l, h, [z, #16] - extr l, s5, s4, #9 - extr h, s6, s5, #9 - stp l, h, [z, #32] + extr l, s5, s4, #9 + extr h, s6, s5, #9 + stp l, h, [z, #32] - extr l, s7, s6, #9 - extr h, c, s7, #9 - stp l, h, [z, #48] + extr l, s7, s6, #9 + extr h, c, s7, #9 + stp l, h, [z, #48] - and t, s0, #0x1FF - lsr c, c, #9 - add t, t, c - str t, [z, #64] + and t, s0, #0x1FF + lsr c, c, #9 + add t, t, c + str t, [z, #64] // Square the lower half with an analogous variant of bignum_sqr_4_8 - mul s2, a0, a2 - mul s7, a1, a3 - umulh t, a0, a2 - subs u, a0, a1 - cneg u, u, cc - csetm s1, cc - subs s0, a3, a2 - cneg s0, s0, cc - mul s6, u, s0 - umulh s0, u, s0 - cinv s1, s1, cc - eor s6, s6, s1 - eor s0, s0, s1 - adds s3, s2, t - adc t, t, xzr - umulh u, a1, a3 - adds s3, s3, s7 - adcs t, t, u - adc u, u, xzr - adds t, t, s7 - adc u, u, xzr - cmn s1, #0x1 - adcs s3, s3, s6 - adcs t, t, s0 - adc u, u, s1 - adds s2, s2, s2 - adcs s3, s3, s3 - adcs t, t, t - adcs u, u, u - adc c, xzr, xzr - mul s0, a0, a0 - mul s6, a1, a1 - mul l, a0, a1 - umulh s1, a0, a0 - umulh s7, a1, a1 - umulh h, a0, a1 - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s2, s2, s6 - adcs s3, s3, s7 - adcs t, t, xzr - adcs u, u, xzr - adc c, c, xzr - mul s4, a2, a2 - mul s6, a3, a3 - mul l, a2, a3 - umulh s5, a2, a2 - umulh s7, a3, a3 - umulh h, a2, a3 - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s4, s4, t - adcs s5, s5, u - adcs s6, s6, c - adc s7, s7, xzr + mul s2, a0, a2 + mul s7, a1, a3 + umulh t, a0, a2 + subs u, a0, a1 + cneg u, u, cc + csetm s1, cc + subs s0, a3, a2 + cneg s0, s0, cc + mul s6, u, s0 + umulh s0, u, s0 + cinv s1, s1, cc + eor s6, s6, s1 + eor s0, s0, s1 + adds s3, s2, t + adc t, t, xzr + umulh u, a1, a3 + adds s3, s3, s7 + adcs t, t, u + adc u, u, xzr + adds t, t, s7 + adc u, u, xzr + cmn s1, #0x1 + adcs s3, s3, s6 + adcs t, t, s0 + adc u, u, s1 + adds s2, s2, s2 + adcs s3, s3, s3 + adcs t, t, t + adcs u, u, u + adc c, xzr, xzr + mul s0, a0, a0 + mul s6, a1, a1 + mul l, a0, a1 + umulh s1, a0, a0 + umulh s7, a1, a1 + umulh h, a0, a1 + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s2, s2, s6 + adcs s3, s3, s7 + adcs t, t, xzr + adcs u, u, xzr + adc c, c, xzr + mul s4, a2, a2 + mul s6, a3, a3 + mul l, a2, a3 + umulh s5, a2, a2 + umulh s7, a3, a3 + umulh h, a2, a3 + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s4, s4, t + adcs s5, s5, u + adcs s6, s6, c + adc s7, s7, xzr // Add it directly to the existing buffer - ldp l, h, [z] - adds l, l, s0 - adcs h, h, s1 - stp l, h, [z] + ldp l, h, [z] + adds l, l, s0 + adcs h, h, s1 + stp l, h, [z] - ldp l, h, [z, #16] - adcs l, l, s2 - adcs h, h, s3 - stp l, h, [z, #16] + ldp l, h, [z, #16] + adcs l, l, s2 + adcs h, h, s3 + stp l, h, [z, #16] - ldp l, h, [z, #32] - adcs l, l, s4 - adcs h, h, s5 - stp l, h, [z, #32] + ldp l, h, [z, #32] + adcs l, l, s4 + adcs h, h, s5 + stp l, h, [z, #32] - ldp l, h, [z, #48] - adcs l, l, s6 - adcs h, h, s7 - stp l, h, [z, #48] + ldp l, h, [z, #48] + adcs l, l, s6 + adcs h, h, s7 + stp l, h, [z, #48] - ldr t, [z, #64] - adc t, t, xzr - str t, [z, #64] + ldr t, [z, #64] + adc t, t, xzr + str t, [z, #64] // Now get the cross-product in [s7,...,s0] with variant of bignum_mul_4_8 - mul s0, a0, b0 - mul s4, a1, b1 - mul s5, a2, b2 - mul s6, a3, b3 - umulh s7, a0, b0 - adds s4, s4, s7 - umulh s7, a1, b1 - adcs s5, s5, s7 - umulh s7, a2, b2 - adcs s6, s6, s7 - umulh s7, a3, b3 - adc s7, s7, xzr - adds s1, s4, s0 - adcs s4, s5, s4 - adcs s5, s6, s5 - adcs s6, s7, s6 - adc s7, xzr, s7 - adds s2, s4, s0 - adcs s3, s5, s1 - adcs s4, s6, s4 - adcs s5, s7, s5 - adcs s6, xzr, s6 - adc s7, xzr, s7 - subs t, a2, a3 - cneg t, t, cc - csetm c, cc - subs h, b3, b2 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s5, s5, l - eor h, h, c - adcs s6, s6, h - adc s7, s7, c - subs t, a0, a1 - cneg t, t, cc - csetm c, cc - subs h, b1, b0 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s1, s1, l - eor h, h, c - adcs s2, s2, h - adcs s3, s3, c - adcs s4, s4, c - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c - subs t, a1, a3 - cneg t, t, cc - csetm c, cc - subs h, b3, b1 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s4, s4, l - eor h, h, c - adcs s5, s5, h - adcs s6, s6, c - adc s7, s7, c - subs t, a0, a2 - cneg t, t, cc - csetm c, cc - subs h, b2, b0 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s2, s2, l - eor h, h, c - adcs s3, s3, h - adcs s4, s4, c - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c - subs t, a0, a3 - cneg t, t, cc - csetm c, cc - subs h, b3, b0 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s3, s3, l - eor h, h, c - adcs s4, s4, h - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c - subs t, a1, a2 - cneg t, t, cc - csetm c, cc - subs h, b2, b1 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s3, s3, l - eor h, h, c - adcs s4, s4, h - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c + mul s0, a0, b0 + mul s4, a1, b1 + mul s5, a2, b2 + mul s6, a3, b3 + umulh s7, a0, b0 + adds s4, s4, s7 + umulh s7, a1, b1 + adcs s5, s5, s7 + umulh s7, a2, b2 + adcs s6, s6, s7 + umulh s7, a3, b3 + adc s7, s7, xzr + adds s1, s4, s0 + adcs s4, s5, s4 + adcs s5, s6, s5 + adcs s6, s7, s6 + adc s7, xzr, s7 + adds s2, s4, s0 + adcs s3, s5, s1 + adcs s4, s6, s4 + adcs s5, s7, s5 + adcs s6, xzr, s6 + adc s7, xzr, s7 + subs t, a2, a3 + cneg t, t, cc + csetm c, cc + subs h, b3, b2 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s5, s5, l + eor h, h, c + adcs s6, s6, h + adc s7, s7, c + subs t, a0, a1 + cneg t, t, cc + csetm c, cc + subs h, b1, b0 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s1, s1, l + eor h, h, c + adcs s2, s2, h + adcs s3, s3, c + adcs s4, s4, c + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c + subs t, a1, a3 + cneg t, t, cc + csetm c, cc + subs h, b3, b1 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s4, s4, l + eor h, h, c + adcs s5, s5, h + adcs s6, s6, c + adc s7, s7, c + subs t, a0, a2 + cneg t, t, cc + csetm c, cc + subs h, b2, b0 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s2, s2, l + eor h, h, c + adcs s3, s3, h + adcs s4, s4, c + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c + subs t, a0, a3 + cneg t, t, cc + csetm c, cc + subs h, b3, b0 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s3, s3, l + eor h, h, c + adcs s4, s4, h + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c + subs t, a1, a2 + cneg t, t, cc + csetm c, cc + subs h, b2, b1 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s3, s3, l + eor h, h, c + adcs s4, s4, h + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c // Let the cross product be M. We want to add 2^256 * 2 * M to the buffer // Split M into M_top (248 bits) and M_bot (264 bits), so we add @@ -534,103 +534,103 @@ _bignum_montsqr_p521: // As this sum is built, accumulate t = AND of words d7...d1 to help // in condensing the carry chain in the comparison that comes next - ldp l, h, [z] - extr d0, s5, s4, #8 - adds d0, d0, l - extr d1, s6, s5, #8 - adcs d1, d1, h - - ldp l, h, [z, #16] - extr d2, s7, s6, #8 - adcs d2, d2, l - and t, d1, d2 - lsr d3, s7, #8 - adcs d3, d3, h - and t, t, d3 - - ldp l, h, [z, #32] - lsl d4, s0, #1 - adcs d4, d4, l - and t, t, d4 - extr d5, s1, s0, #63 - adcs d5, d5, h - and t, t, d5 - - ldp l, h, [z, #48] - extr d6, s2, s1, #63 - adcs d6, d6, l - and t, t, d6 - extr d7, s3, s2, #63 - adcs d7, d7, h - and t, t, d7 - - ldr l, [z, #64] - extr d8, s4, s3, #63 - and d8, d8, #0x1FF - adc d8, l, d8 + ldp l, h, [z] + extr d0, s5, s4, #8 + adds d0, d0, l + extr d1, s6, s5, #8 + adcs d1, d1, h + + ldp l, h, [z, #16] + extr d2, s7, s6, #8 + adcs d2, d2, l + and t, d1, d2 + lsr d3, s7, #8 + adcs d3, d3, h + and t, t, d3 + + ldp l, h, [z, #32] + lsl d4, s0, #1 + adcs d4, d4, l + and t, t, d4 + extr d5, s1, s0, #63 + adcs d5, d5, h + and t, t, d5 + + ldp l, h, [z, #48] + extr d6, s2, s1, #63 + adcs d6, d6, l + and t, t, d6 + extr d7, s3, s2, #63 + adcs d7, d7, h + and t, t, d7 + + ldr l, [z, #64] + extr d8, s4, s3, #63 + and d8, d8, #0x1FF + adc d8, l, d8 // Extract the high part h and mask off the low part l = [d8;d7;...;d0] // but stuff d8 with 1 bits at the left to ease a comparison below - lsr h, d8, #9 - orr d8, d8, #~0x1FF + lsr h, d8, #9 + orr d8, d8, #~0x1FF // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits d7,...d1 are all 1s, we use the AND of them "t" to // condense the carry chain, and since we stuffed 1 bits into d8 we get // the result in CF without an additional comparison. - subs xzr, xzr, xzr - adcs xzr, d0, h - adcs xzr, t, xzr - adcs xzr, d8, xzr + subs xzr, xzr, xzr + adcs xzr, d0, h + adcs xzr, t, xzr + adcs xzr, d8, xzr // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // This masking also gets rid of the stuffing with 1s we did above. - adcs d0, d0, h - adcs d1, d1, xzr - adcs d2, d2, xzr - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - adcs d6, d6, xzr - adcs d7, d7, xzr - adc d8, d8, xzr - and d8, d8, #0x1FF + adcs d0, d0, h + adcs d1, d1, xzr + adcs d2, d2, xzr + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + adcs d6, d6, xzr + adcs d7, d7, xzr + adc d8, d8, xzr + and d8, d8, #0x1FF // So far, this has been the same as a pure modular squaring. // Now finally the Montgomery ingredient, which is just a 521-bit // rotation by 9*64 - 521 = 55 bits right. - lsl c, d0, #9 - extr d0, d1, d0, #55 - extr d1, d2, d1, #55 - extr d2, d3, d2, #55 - extr d3, d4, d3, #55 - orr d8, d8, c - extr d4, d5, d4, #55 - extr d5, d6, d5, #55 - extr d6, d7, d6, #55 - extr d7, d8, d7, #55 - lsr d8, d8, #55 + lsl c, d0, #9 + extr d0, d1, d0, #55 + extr d1, d2, d1, #55 + extr d2, d3, d2, #55 + extr d3, d4, d3, #55 + orr d8, d8, c + extr d4, d5, d4, #55 + extr d5, d6, d5, #55 + extr d6, d7, d6, #55 + extr d7, d8, d7, #55 + lsr d8, d8, #55 // Store the final result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] // Restore regs and return - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_montsqr_p521_alt.S b/arm/p521/bignum_montsqr_p521_alt.S index ea954eaf7c..20e65296e5 100644 --- a/arm/p521/bignum_montsqr_p521_alt.S +++ b/arm/p521/bignum_montsqr_p521_alt.S @@ -71,325 +71,325 @@ _bignum_montsqr_p521_alt: // It's convenient to have more registers to play with - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - stp x27, x29, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x29, [sp, #-16]! // Load low 8 elements as [a7;a6;a5;a4;a3;a2;a1;a0], set up an initial // window [u8;u7;u6;u5;u4;u3;u2;u1] = 10 + 20 + 30 + 40 + 50 + 60 + 70 - ldp a0, a1, [x] + ldp a0, a1, [x] - mul u1, a0, a1 - umulh u2, a0, a1 + mul u1, a0, a1 + umulh u2, a0, a1 - ldp a2, a3, [x, #16] + ldp a2, a3, [x, #16] - mul l, a0, a2 - umulh u3, a0, a2 - adds u2, u2, l + mul l, a0, a2 + umulh u3, a0, a2 + adds u2, u2, l - ldp a4, a5, [x, #32] + ldp a4, a5, [x, #32] - mul l, a0, a3 - umulh u4, a0, a3 - adcs u3, u3, l + mul l, a0, a3 + umulh u4, a0, a3 + adcs u3, u3, l - ldp a6, a7, [x, #48] + ldp a6, a7, [x, #48] - mul l, a0, a4 - umulh u5, a0, a4 - adcs u4, u4, l + mul l, a0, a4 + umulh u5, a0, a4 + adcs u4, u4, l - mul l, a0, a5 - umulh u6, a0, a5 - adcs u5, u5, l + mul l, a0, a5 + umulh u6, a0, a5 + adcs u5, u5, l - mul l, a0, a6 - umulh u7, a0, a6 - adcs u6, u6, l + mul l, a0, a6 + umulh u7, a0, a6 + adcs u6, u6, l - mul l, a0, a7 - umulh u8, a0, a7 - adcs u7, u7, l + mul l, a0, a7 + umulh u8, a0, a7 + adcs u7, u7, l - adc u8, u8, xzr + adc u8, u8, xzr // Add in the next diagonal = 21 + 31 + 41 + 51 + 61 + 71 + 54 - mul l, a1, a2 - adds u3, u3, l - mul l, a1, a3 - adcs u4, u4, l - mul l, a1, a4 - adcs u5, u5, l - mul l, a1, a5 - adcs u6, u6, l - mul l, a1, a6 - adcs u7, u7, l - mul l, a1, a7 - adcs u8, u8, l - cset u9, cs - - umulh l, a1, a2 - adds u4, u4, l - umulh l, a1, a3 - adcs u5, u5, l - umulh l, a1, a4 - adcs u6, u6, l - umulh l, a1, a5 - adcs u7, u7, l - umulh l, a1, a6 - adcs u8, u8, l - umulh l, a1, a7 - adc u9, u9, l - mul l, a4, a5 - umulh u10, a4, a5 - adds u9, u9, l - adc u10, u10, xzr + mul l, a1, a2 + adds u3, u3, l + mul l, a1, a3 + adcs u4, u4, l + mul l, a1, a4 + adcs u5, u5, l + mul l, a1, a5 + adcs u6, u6, l + mul l, a1, a6 + adcs u7, u7, l + mul l, a1, a7 + adcs u8, u8, l + cset u9, cs + + umulh l, a1, a2 + adds u4, u4, l + umulh l, a1, a3 + adcs u5, u5, l + umulh l, a1, a4 + adcs u6, u6, l + umulh l, a1, a5 + adcs u7, u7, l + umulh l, a1, a6 + adcs u8, u8, l + umulh l, a1, a7 + adc u9, u9, l + mul l, a4, a5 + umulh u10, a4, a5 + adds u9, u9, l + adc u10, u10, xzr // And the next one = 32 + 42 + 52 + 62 + 72 + 64 + 65 - mul l, a2, a3 - adds u5, u5, l - mul l, a2, a4 - adcs u6, u6, l - mul l, a2, a5 - adcs u7, u7, l - mul l, a2, a6 - adcs u8, u8, l - mul l, a2, a7 - adcs u9, u9, l - mul l, a4, a6 - adcs u10, u10, l - cset u11, cs - - umulh l, a2, a3 - adds u6, u6, l - umulh l, a2, a4 - adcs u7, u7, l - umulh l, a2, a5 - adcs u8, u8, l - umulh l, a2, a6 - adcs u9, u9, l - umulh l, a2, a7 - adcs u10, u10, l - umulh l, a4, a6 - adc u11, u11, l - mul l, a5, a6 - umulh u12, a5, a6 - adds u11, u11, l - adc u12, u12, xzr + mul l, a2, a3 + adds u5, u5, l + mul l, a2, a4 + adcs u6, u6, l + mul l, a2, a5 + adcs u7, u7, l + mul l, a2, a6 + adcs u8, u8, l + mul l, a2, a7 + adcs u9, u9, l + mul l, a4, a6 + adcs u10, u10, l + cset u11, cs + + umulh l, a2, a3 + adds u6, u6, l + umulh l, a2, a4 + adcs u7, u7, l + umulh l, a2, a5 + adcs u8, u8, l + umulh l, a2, a6 + adcs u9, u9, l + umulh l, a2, a7 + adcs u10, u10, l + umulh l, a4, a6 + adc u11, u11, l + mul l, a5, a6 + umulh u12, a5, a6 + adds u11, u11, l + adc u12, u12, xzr // And the final one = 43 + 53 + 63 + 73 + 74 + 75 + 76 - mul l, a3, a4 - adds u7, u7, l - mul l, a3, a5 - adcs u8, u8, l - mul l, a3, a6 - adcs u9, u9, l - mul l, a3, a7 - adcs u10, u10, l - mul l, a4, a7 - adcs u11, u11, l - mul l, a5, a7 - adcs u12, u12, l - cset u13, cs - - umulh l, a3, a4 - adds u8, u8, l - umulh l, a3, a5 - adcs u9, u9, l - umulh l, a3, a6 - adcs u10, u10, l - umulh l, a3, a7 - adcs u11, u11, l - umulh l, a4, a7 - adcs u12, u12, l - umulh l, a5, a7 - adc u13, u13, l - mul l, a6, a7 - umulh u14, a6, a7 - adds u13, u13, l - adc u14, u14, xzr + mul l, a3, a4 + adds u7, u7, l + mul l, a3, a5 + adcs u8, u8, l + mul l, a3, a6 + adcs u9, u9, l + mul l, a3, a7 + adcs u10, u10, l + mul l, a4, a7 + adcs u11, u11, l + mul l, a5, a7 + adcs u12, u12, l + cset u13, cs + + umulh l, a3, a4 + adds u8, u8, l + umulh l, a3, a5 + adcs u9, u9, l + umulh l, a3, a6 + adcs u10, u10, l + umulh l, a3, a7 + adcs u11, u11, l + umulh l, a4, a7 + adcs u12, u12, l + umulh l, a5, a7 + adc u13, u13, l + mul l, a6, a7 + umulh u14, a6, a7 + adds u13, u13, l + adc u14, u14, xzr // Double that, with u15 holding the top carry - adds u1, u1, u1 - adcs u2, u2, u2 - adcs u3, u3, u3 - adcs u4, u4, u4 - adcs u5, u5, u5 - adcs u6, u6, u6 - adcs u7, u7, u7 - adcs u8, u8, u8 - adcs u9, u9, u9 - adcs u10, u10, u10 - adcs u11, u11, u11 - adcs u12, u12, u12 - adcs u13, u13, u13 - adcs u14, u14, u14 - cset u15, cs + adds u1, u1, u1 + adcs u2, u2, u2 + adcs u3, u3, u3 + adcs u4, u4, u4 + adcs u5, u5, u5 + adcs u6, u6, u6 + adcs u7, u7, u7 + adcs u8, u8, u8 + adcs u9, u9, u9 + adcs u10, u10, u10 + adcs u11, u11, u11 + adcs u12, u12, u12 + adcs u13, u13, u13 + adcs u14, u14, u14 + cset u15, cs // Add the homogeneous terms 00 + 11 + 22 + 33 + 44 + 55 + 66 + 77 - umulh l, a0, a0 - mul u0, a0, a0 - adds u1, u1, l - - mul l, a1, a1 - adcs u2, u2, l - umulh l, a1, a1 - adcs u3, u3, l - - mul l, a2, a2 - adcs u4, u4, l - umulh l, a2, a2 - adcs u5, u5, l - - mul l, a3, a3 - adcs u6, u6, l - umulh l, a3, a3 - adcs u7, u7, l - - mul l, a4, a4 - adcs u8, u8, l - umulh l, a4, a4 - adcs u9, u9, l - - mul l, a5, a5 - adcs u10, u10, l - umulh l, a5, a5 - adcs u11, u11, l - - mul l, a6, a6 - adcs u12, u12, l - umulh l, a6, a6 - adcs u13, u13, l - - mul l, a7, a7 - adcs u14, u14, l - umulh l, a7, a7 - adc u15, u15, l + umulh l, a0, a0 + mul u0, a0, a0 + adds u1, u1, l + + mul l, a1, a1 + adcs u2, u2, l + umulh l, a1, a1 + adcs u3, u3, l + + mul l, a2, a2 + adcs u4, u4, l + umulh l, a2, a2 + adcs u5, u5, l + + mul l, a3, a3 + adcs u6, u6, l + umulh l, a3, a3 + adcs u7, u7, l + + mul l, a4, a4 + adcs u8, u8, l + umulh l, a4, a4 + adcs u9, u9, l + + mul l, a5, a5 + adcs u10, u10, l + umulh l, a5, a5 + adcs u11, u11, l + + mul l, a6, a6 + adcs u12, u12, l + umulh l, a6, a6 + adcs u13, u13, l + + mul l, a7, a7 + adcs u14, u14, l + umulh l, a7, a7 + adc u15, u15, l // Now load in the top digit a8, and also set up its double and square - ldr a8, [x, #64] - mul u16, a8, a8 - add a8, a8, a8 + ldr a8, [x, #64] + mul u16, a8, a8 + add a8, a8, a8 // Add a8 * [a7;...;a0] into the top of the buffer - mul l, a8, a0 - adds u8, u8, l - mul l, a8, a1 - adcs u9, u9, l - mul l, a8, a2 - adcs u10, u10, l - mul l, a8, a3 - adcs u11, u11, l - mul l, a8, a4 - adcs u12, u12, l - mul l, a8, a5 - adcs u13, u13, l - mul l, a8, a6 - adcs u14, u14, l - mul l, a8, a7 - adcs u15, u15, l - adc u16, u16, xzr - - umulh l, a8, a0 - adds u9, u9, l - umulh l, a8, a1 - adcs u10, u10, l - umulh l, a8, a2 - adcs u11, u11, l - umulh l, a8, a3 - adcs u12, u12, l - umulh l, a8, a4 - adcs u13, u13, l - umulh l, a8, a5 - adcs u14, u14, l - umulh l, a8, a6 - adcs u15, u15, l - umulh l, a8, a7 - adc u16, u16, l + mul l, a8, a0 + adds u8, u8, l + mul l, a8, a1 + adcs u9, u9, l + mul l, a8, a2 + adcs u10, u10, l + mul l, a8, a3 + adcs u11, u11, l + mul l, a8, a4 + adcs u12, u12, l + mul l, a8, a5 + adcs u13, u13, l + mul l, a8, a6 + adcs u14, u14, l + mul l, a8, a7 + adcs u15, u15, l + adc u16, u16, xzr + + umulh l, a8, a0 + adds u9, u9, l + umulh l, a8, a1 + adcs u10, u10, l + umulh l, a8, a2 + adcs u11, u11, l + umulh l, a8, a3 + adcs u12, u12, l + umulh l, a8, a4 + adcs u13, u13, l + umulh l, a8, a5 + adcs u14, u14, l + umulh l, a8, a6 + adcs u15, u15, l + umulh l, a8, a7 + adc u16, u16, l // Now we have the full product, which we consider as // 2^521 * h + l. Form h + l + 1 - subs xzr, xzr, xzr - extr l, u9, u8, #9 - adcs u0, u0, l - extr l, u10, u9, #9 - adcs u1, u1, l - extr l, u11, u10, #9 - adcs u2, u2, l - extr l, u12, u11, #9 - adcs u3, u3, l - extr l, u13, u12, #9 - adcs u4, u4, l - extr l, u14, u13, #9 - adcs u5, u5, l - extr l, u15, u14, #9 - adcs u6, u6, l - extr l, u16, u15, #9 - adcs u7, u7, l - orr u8, u8, #~0x1FF - lsr l, u16, #9 - adcs u8, u8, l + subs xzr, xzr, xzr + extr l, u9, u8, #9 + adcs u0, u0, l + extr l, u10, u9, #9 + adcs u1, u1, l + extr l, u11, u10, #9 + adcs u2, u2, l + extr l, u12, u11, #9 + adcs u3, u3, l + extr l, u13, u12, #9 + adcs u4, u4, l + extr l, u14, u13, #9 + adcs u5, u5, l + extr l, u15, u14, #9 + adcs u6, u6, l + extr l, u16, u15, #9 + adcs u7, u7, l + orr u8, u8, #~0x1FF + lsr l, u16, #9 + adcs u8, u8, l // Now CF is set if h + l + 1 >= 2^521, which means it's already // the answer, while if ~CF the answer is h + l so we should subtract // 1 (all considered in 521 bits). Hence subtract ~CF and mask. - sbcs u0, u0, xzr - sbcs u1, u1, xzr - sbcs u2, u2, xzr - sbcs u3, u3, xzr - sbcs u4, u4, xzr - sbcs u5, u5, xzr - sbcs u6, u6, xzr - sbcs u7, u7, xzr - sbc u8, u8, xzr - and u8, u8, #0x1FF + sbcs u0, u0, xzr + sbcs u1, u1, xzr + sbcs u2, u2, xzr + sbcs u3, u3, xzr + sbcs u4, u4, xzr + sbcs u5, u5, xzr + sbcs u6, u6, xzr + sbcs u7, u7, xzr + sbc u8, u8, xzr + and u8, u8, #0x1FF // So far, this has been the same as a pure modular squaring // Now finally the Montgomery ingredient, which is just a 521-bit // rotation by 9*64 - 521 = 55 bits right. - lsl l, u0, #9 - extr u0, u1, u0, #55 - extr u1, u2, u1, #55 - extr u2, u3, u2, #55 - extr u3, u4, u3, #55 - orr u8, u8, l - extr u4, u5, u4, #55 - extr u5, u6, u5, #55 - extr u6, u7, u6, #55 - extr u7, u8, u7, #55 - lsr u8, u8, #55 + lsl l, u0, #9 + extr u0, u1, u0, #55 + extr u1, u2, u1, #55 + extr u2, u3, u2, #55 + extr u3, u4, u3, #55 + orr u8, u8, l + extr u4, u5, u4, #55 + extr u5, u6, u5, #55 + extr u6, u7, u6, #55 + extr u7, u8, u7, #55 + lsr u8, u8, #55 // Store back digits of final result - stp u0, u1, [z] - stp u2, u3, [z, #16] - stp u4, u5, [z, #32] - stp u6, u7, [z, #48] - str u8, [z, #64] + stp u0, u1, [z] + stp u2, u3, [z, #16] + stp u4, u5, [z, #32] + stp u6, u7, [z, #48] + str u8, [z, #64] // Restore registers and return - ldp x27, x29, [sp], #16 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x27, x29, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_mul_p521.S b/arm/p521/bignum_mul_p521.S index 9c19787059..832077690c 100644 --- a/arm/p521/bignum_mul_p521.S +++ b/arm/p521/bignum_mul_p521.S @@ -90,174 +90,174 @@ #define mul4 \ /* First accumulate all the "simple" products as [s7,s6,s5,s4,s0] */ \ - \ - mul s0, a0, b0; \ - mul s4, a1, b1; \ - mul s5, a2, b2; \ - mul s6, a3, b3; \ - \ - umulh s7, a0, b0; \ - adds s4, s4, s7; \ - umulh s7, a1, b1; \ - adcs s5, s5, s7; \ - umulh s7, a2, b2; \ - adcs s6, s6, s7; \ - umulh s7, a3, b3; \ - adc s7, s7, xzr; \ - \ + \ + mul s0, a0, b0; \ + mul s4, a1, b1; \ + mul s5, a2, b2; \ + mul s6, a3, b3; \ + \ + umulh s7, a0, b0; \ + adds s4, s4, s7; \ + umulh s7, a1, b1; \ + adcs s5, s5, s7; \ + umulh s7, a2, b2; \ + adcs s6, s6, s7; \ + umulh s7, a3, b3; \ + adc s7, s7, xzr; \ + \ /* Multiply by B + 1 to get [s7;s6;s5;s4;s1;s0] */ \ - \ - adds s1, s4, s0; \ - adcs s4, s5, s4; \ - adcs s5, s6, s5; \ - adcs s6, s7, s6; \ - adc s7, xzr, s7; \ - \ + \ + adds s1, s4, s0; \ + adcs s4, s5, s4; \ + adcs s5, s6, s5; \ + adcs s6, s7, s6; \ + adc s7, xzr, s7; \ + \ /* Multiply by B^2 + 1 to get [s7;s6;s5;s4;s3;s2;s1;s0] */ \ - \ - adds s2, s4, s0; \ - adcs s3, s5, s1; \ - adcs s4, s6, s4; \ - adcs s5, s7, s5; \ - adcs s6, xzr, s6; \ - adc s7, xzr, s7; \ - \ + \ + adds s2, s4, s0; \ + adcs s3, s5, s1; \ + adcs s4, s6, s4; \ + adcs s5, s7, s5; \ + adcs s6, xzr, s6; \ + adc s7, xzr, s7; \ + \ /* Now add in all the "complicated" terms. */ \ - \ - muldiffnadd(s6,s5, a2,a3, b3,b2); \ - adc s7, s7, c; \ - \ - muldiffnadd(s2,s1, a0,a1, b1,b0); \ - adcs s3, s3, c; \ - adcs s4, s4, c; \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - \ - muldiffnadd(s5,s4, a1,a3, b3,b1); \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - \ - muldiffnadd(s3,s2, a0,a2, b2,b0); \ - adcs s4, s4, c; \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - \ - muldiffnadd(s4,s3, a0,a3, b3,b0); \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c; \ - muldiffnadd(s4,s3, a1,a2, b2,b1); \ - adcs s5, s5, c; \ - adcs s6, s6, c; \ - adc s7, s7, c \ + \ + muldiffnadd(s6,s5, a2,a3, b3,b2); \ + adc s7, s7, c; \ + \ + muldiffnadd(s2,s1, a0,a1, b1,b0); \ + adcs s3, s3, c; \ + adcs s4, s4, c; \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + \ + muldiffnadd(s5,s4, a1,a3, b3,b1); \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + \ + muldiffnadd(s3,s2, a0,a2, b2,b0); \ + adcs s4, s4, c; \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + \ + muldiffnadd(s4,s3, a0,a3, b3,b0); \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c; \ + muldiffnadd(s4,s3, a1,a2, b2,b1); \ + adcs s5, s5, c; \ + adcs s6, s6, c; \ + adc s7, s7, c \ bignum_mul_p521: _bignum_mul_p521: // Save registers and make space for the temporary buffer - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - sub sp, sp, #80 + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, #80 // Load 4-digit low parts and multiply them to get L - ldp a0, a1, [x] - ldp a2, a3, [x, #16] - ldp b0, b1, [y] - ldp b2, b3, [y, #16] - mul4 + ldp a0, a1, [x] + ldp a2, a3, [x, #16] + ldp b0, b1, [y] + ldp b2, b3, [y, #16] + mul4 // Shift right 256 bits modulo p_521 and stash in temp buffer - lsl c, s0, #9 - extr s0, s1, s0, #55 - extr s1, s2, s1, #55 - extr s2, s3, s2, #55 - lsr s3, s3, #55 - stp s4, s5, [sp] - stp s6, s7, [sp, #16] - stp c, s0, [sp, #32] - stp s1, s2, [sp, #48] - str s3, [sp, #64] + lsl c, s0, #9 + extr s0, s1, s0, #55 + extr s1, s2, s1, #55 + extr s2, s3, s2, #55 + lsr s3, s3, #55 + stp s4, s5, [sp] + stp s6, s7, [sp, #16] + stp c, s0, [sp, #32] + stp s1, s2, [sp, #48] + str s3, [sp, #64] // Load 4-digit low parts and multiply them to get H - ldp a0, a1, [x, #32] - ldp a2, a3, [x, #48] - ldp b0, b1, [y, #32] - ldp b2, b3, [y, #48] - mul4 + ldp a0, a1, [x, #32] + ldp a2, a3, [x, #48] + ldp b0, b1, [y, #32] + ldp b2, b3, [y, #48] + mul4 // Add to the existing temporary buffer and re-stash. // This gives a result HL congruent to (2^256 * H + L) / 2^256 modulo p_521 - ldp l, h, [sp] - adds s0, s0, l - adcs s1, s1, h - stp s0, s1, [sp] - ldp l, h, [sp, #16] - adcs s2, s2, l - adcs s3, s3, h - stp s2, s3, [sp, #16] - ldp l, h, [sp, #32] - adcs s4, s4, l - adcs s5, s5, h - stp s4, s5, [sp, #32] - ldp l, h, [sp, #48] - adcs s6, s6, l - adcs s7, s7, h - stp s6, s7, [sp, #48] - ldr c, [sp, #64] - adc c, c, xzr - str c, [sp, #64] + ldp l, h, [sp] + adds s0, s0, l + adcs s1, s1, h + stp s0, s1, [sp] + ldp l, h, [sp, #16] + adcs s2, s2, l + adcs s3, s3, h + stp s2, s3, [sp, #16] + ldp l, h, [sp, #32] + adcs s4, s4, l + adcs s5, s5, h + stp s4, s5, [sp, #32] + ldp l, h, [sp, #48] + adcs s6, s6, l + adcs s7, s7, h + stp s6, s7, [sp, #48] + ldr c, [sp, #64] + adc c, c, xzr + str c, [sp, #64] // Compute t,[a3,a2,a1,a0] = x_hi - x_lo // and s,[b3,b2,b1,b0] = y_lo - y_hi // sign-magnitude differences, then XOR overall sign bitmask into s - ldp l, h, [x] - subs a0, a0, l - sbcs a1, a1, h - ldp l, h, [x, #16] - sbcs a2, a2, l - sbcs a3, a3, h - csetm t, cc - ldp l, h, [y] - subs b0, l, b0 - sbcs b1, h, b1 - ldp l, h, [y, #16] - sbcs b2, l, b2 - sbcs b3, h, b3 - csetm s, cc - - eor a0, a0, t - subs a0, a0, t - eor a1, a1, t - sbcs a1, a1, t - eor a2, a2, t - sbcs a2, a2, t - eor a3, a3, t - sbc a3, a3, t - - eor b0, b0, s - subs b0, b0, s - eor b1, b1, s - sbcs b1, b1, s - eor b2, b2, s - sbcs b2, b2, s - eor b3, b3, s - sbc b3, b3, s - - eor s, s, t + ldp l, h, [x] + subs a0, a0, l + sbcs a1, a1, h + ldp l, h, [x, #16] + sbcs a2, a2, l + sbcs a3, a3, h + csetm t, cc + ldp l, h, [y] + subs b0, l, b0 + sbcs b1, h, b1 + ldp l, h, [y, #16] + sbcs b2, l, b2 + sbcs b3, h, b3 + csetm s, cc + + eor a0, a0, t + subs a0, a0, t + eor a1, a1, t + sbcs a1, a1, t + eor a2, a2, t + sbcs a2, a2, t + eor a3, a3, t + sbc a3, a3, t + + eor b0, b0, s + subs b0, b0, s + eor b1, b1, s + sbcs b1, b1, s + eor b2, b2, s + sbcs b2, b2, s + eor b3, b3, s + sbc b3, b3, s + + eor s, s, t // Now do yet a third 4x4 multiply to get mid-term product M - mul4 + mul4 // We now want, at the 256 position, 2^256 * HL + HL + (-1)^s * M // To keep things positive we use M' = p_521 - M in place of -M, @@ -273,48 +273,48 @@ _bignum_mul_p521: // small c (s8 + suspended carry) to add at the 256 position here (512 // overall). This can be added in the next block (to b0 = sum4). - ldp a0, a1, [sp] - ldp a2, a3, [sp, #16] - - eor s0, s0, s - adds s0, s0, a0 - eor s1, s1, s - adcs s1, s1, a1 - eor s2, s2, s - adcs s2, s2, a2 - eor s3, s3, s - adcs s3, s3, a3 - eor s4, s4, s - - ldp b0, b1, [sp, #32] - ldp b2, b3, [sp, #48] - ldr s8, [sp, #64] - - adcs s4, s4, b0 - eor s5, s5, s - adcs s5, s5, b1 - eor s6, s6, s - adcs s6, s6, b2 - eor s7, s7, s - adcs s7, s7, b3 - adc c, s8, xzr - - adds s4, s4, a0 - adcs s5, s5, a1 - adcs s6, s6, a2 - adcs s7, s7, a3 - and s, s, #0x1FF - lsl t, s0, #9 - orr t, t, s - adcs b0, b0, t - extr t, s1, s0, #55 - adcs b1, b1, t - extr t, s2, s1, #55 - adcs b2, b2, t - extr t, s3, s2, #55 - adcs b3, b3, t - lsr t, s3, #55 - adc s8, t, s8 + ldp a0, a1, [sp] + ldp a2, a3, [sp, #16] + + eor s0, s0, s + adds s0, s0, a0 + eor s1, s1, s + adcs s1, s1, a1 + eor s2, s2, s + adcs s2, s2, a2 + eor s3, s3, s + adcs s3, s3, a3 + eor s4, s4, s + + ldp b0, b1, [sp, #32] + ldp b2, b3, [sp, #48] + ldr s8, [sp, #64] + + adcs s4, s4, b0 + eor s5, s5, s + adcs s5, s5, b1 + eor s6, s6, s + adcs s6, s6, b2 + eor s7, s7, s + adcs s7, s7, b3 + adc c, s8, xzr + + adds s4, s4, a0 + adcs s5, s5, a1 + adcs s6, s6, a2 + adcs s7, s7, a3 + and s, s, #0x1FF + lsl t, s0, #9 + orr t, t, s + adcs b0, b0, t + extr t, s1, s0, #55 + adcs b1, b1, t + extr t, s2, s1, #55 + adcs b2, b2, t + extr t, s3, s2, #55 + adcs b3, b3, t + lsr t, s3, #55 + adc s8, t, s8 // Augment the total with the contribution from the top little words // w and v. If we write the inputs as 2^512 * w + x and 2^512 * v + y @@ -361,243 +361,243 @@ _bignum_mul_p521: // 0 * 52 = 64 * 0 + 0 - ldr v, [y, #64] - ldp c0, c1, [x] - and l, c0, #0x000fffffffffffff - mul l, v, l - ldr w, [x, #64] - ldp d0, d1, [y] - and t, d0, #0x000fffffffffffff - mul t, w, t - add l, l, t + ldr v, [y, #64] + ldp c0, c1, [x] + and l, c0, #0x000fffffffffffff + mul l, v, l + ldr w, [x, #64] + ldp d0, d1, [y] + and t, d0, #0x000fffffffffffff + mul t, w, t + add l, l, t // 1 * 52 = 64 * 0 + 52 - extr t, c1, c0, #52 - and t, t, #0x000fffffffffffff - mul h, v, t - extr t, d1, d0, #52 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t - lsr t, l, #52 - add h, h, t + extr t, c1, c0, #52 + and t, t, #0x000fffffffffffff + mul h, v, t + extr t, d1, d0, #52 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #12 - adds sum0, sum0, t + lsl l, l, #12 + extr t, h, l, #12 + adds sum0, sum0, t // 2 * 52 = 64 * 1 + 40 - ldp c2, c3, [x, #16] - ldp d2, d3, [y, #16] - extr t, c2, c1, #40 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d2, d1, #40 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t - lsr t, h, #52 - add l, l, t - - lsl h, h, #12 - extr t, l, h, #24 - adcs sum1, sum1, t + ldp c2, c3, [x, #16] + ldp d2, d3, [y, #16] + extr t, c2, c1, #40 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d2, d1, #40 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t + lsr t, h, #52 + add l, l, t + + lsl h, h, #12 + extr t, l, h, #24 + adcs sum1, sum1, t // 3 * 52 = 64 * 2 + 28 - extr t, c3, c2, #28 - and t, t, #0x000fffffffffffff - mul h, v, t - extr t, d3, d2, #28 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t - lsr t, l, #52 - add h, h, t - - lsl l, l, #12 - extr t, h, l, #36 - adcs sum2, sum2, t - and u, sum1, sum2 + extr t, c3, c2, #28 + and t, t, #0x000fffffffffffff + mul h, v, t + extr t, d3, d2, #28 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t + lsr t, l, #52 + add h, h, t + + lsl l, l, #12 + extr t, h, l, #36 + adcs sum2, sum2, t + and u, sum1, sum2 // 4 * 52 = 64 * 3 + 16 // At this point we also fold in the addition of c at the right place. // Note that 4 * 64 = 4 * 52 + 48 so we shift c left 48 places to align. - ldp c4, c5, [x, #32] - ldp d4, d5, [y, #32] - extr t, c4, c3, #16 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d4, d3, #16 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t + ldp c4, c5, [x, #32] + ldp d4, d5, [y, #32] + extr t, c4, c3, #16 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d4, d3, #16 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t - lsl c, c, #48 - add l, l, c + lsl c, c, #48 + add l, l, c - lsr t, h, #52 - add l, l, t + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #48 - adcs sum3, sum3, t - and u, u, sum3 + lsl h, h, #12 + extr t, l, h, #48 + adcs sum3, sum3, t + and u, u, sum3 // 5 * 52 = 64 * 4 + 4 - lsr t, c4, #4 - and t, t, #0x000fffffffffffff - mul h, v, t - lsr t, d4, #4 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t + lsr t, c4, #4 + and t, t, #0x000fffffffffffff + mul h, v, t + lsr t, d4, #4 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t - lsr t, l, #52 - add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr s, h, l, #60 + lsl l, l, #12 + extr s, h, l, #60 // 6 * 52 = 64 * 4 + 56 - extr t, c5, c4, #56 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d5, d4, #56 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t + extr t, c5, c4, #56 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d5, d4, #56 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t - lsr t, h, #52 - add l, l, t + lsr t, h, #52 + add l, l, t - lsl s, s, #8 - extr t, l, s, #8 - adcs sum4, sum4, t - and u, u, sum4 + lsl s, s, #8 + extr t, l, s, #8 + adcs sum4, sum4, t + and u, u, sum4 // 7 * 52 = 64 * 5 + 44 - ldp c6, c7, [x, #48] - ldp d6, d7, [y, #48] - extr t, c6, c5, #44 - and t, t, #0x000fffffffffffff - mul h, v, t - extr t, d6, d5, #44 - and t, t, #0x000fffffffffffff - mul t, w, t - add h, h, t + ldp c6, c7, [x, #48] + ldp d6, d7, [y, #48] + extr t, c6, c5, #44 + and t, t, #0x000fffffffffffff + mul h, v, t + extr t, d6, d5, #44 + and t, t, #0x000fffffffffffff + mul t, w, t + add h, h, t - lsr t, l, #52 - add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #20 - adcs sum5, sum5, t - and u, u, sum5 + lsl l, l, #12 + extr t, h, l, #20 + adcs sum5, sum5, t + and u, u, sum5 // 8 * 52 = 64 * 6 + 32 - extr t, c7, c6, #32 - and t, t, #0x000fffffffffffff - mul l, v, t - extr t, d7, d6, #32 - and t, t, #0x000fffffffffffff - mul t, w, t - add l, l, t + extr t, c7, c6, #32 + and t, t, #0x000fffffffffffff + mul l, v, t + extr t, d7, d6, #32 + and t, t, #0x000fffffffffffff + mul t, w, t + add l, l, t - lsr t, h, #52 - add l, l, t + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #32 - adcs sum6, sum6, t - and u, u, sum6 + lsl h, h, #12 + extr t, l, h, #32 + adcs sum6, sum6, t + and u, u, sum6 // 9 * 52 = 64 * 7 + 20 - lsr t, c7, #20 - mul h, v, t - lsr t, d7, #20 - mul t, w, t - add h, h, t + lsr t, c7, #20 + mul h, v, t + lsr t, d7, #20 + mul t, w, t + add h, h, t - lsr t, l, #52 - add h, h, t + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #44 - adcs sum7, sum7, t - and u, u, sum7 + lsl l, l, #12 + extr t, h, l, #44 + adcs sum7, sum7, t + and u, u, sum7 // Top word - mul t, v, w - lsr h, h, #44 - add t, t, h - adc sum8, sum8, t + mul t, v, w + lsr h, h, #44 + add t, t, h + adc sum8, sum8, t // Extract the high part h and mask off the low part l = [sum8;sum7;...;sum0] // but stuff sum8 with 1 bits at the left to ease a comparison below - lsr h, sum8, #9 - orr sum8, sum8, #~0x1FF + lsr h, sum8, #9 + orr sum8, sum8, #~0x1FF // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits sum7,...sum1 are all 1s, we use the AND of them "u" to // condense the carry chain, and since we stuffed 1 bits into sum8 we get // the result in CF without an additional comparison. - subs xzr, xzr, xzr - adcs xzr, sum0, h - adcs xzr, u, xzr - adcs xzr, sum8, xzr + subs xzr, xzr, xzr + adcs xzr, sum0, h + adcs xzr, u, xzr + adcs xzr, sum8, xzr // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // We don't need to mask away bits above 521 since they disappear below. - adcs sum0, sum0, h - adcs sum1, sum1, xzr - adcs sum2, sum2, xzr - adcs sum3, sum3, xzr - adcs sum4, sum4, xzr - adcs sum5, sum5, xzr - adcs sum6, sum6, xzr - adcs sum7, sum7, xzr - adc sum8, sum8, xzr + adcs sum0, sum0, h + adcs sum1, sum1, xzr + adcs sum2, sum2, xzr + adcs sum3, sum3, xzr + adcs sum4, sum4, xzr + adcs sum5, sum5, xzr + adcs sum6, sum6, xzr + adcs sum7, sum7, xzr + adc sum8, sum8, xzr // The result is actually 2^512 * [sum8;...;sum0] == 2^-9 * [sum8;...;sum0] // so we rotate right by 9 bits - and h, sum0, #0x1FF - extr sum0, sum1, sum0, #9 - extr sum1, sum2, sum1, #9 - stp sum0, sum1, [z] - extr sum2, sum3, sum2, #9 - extr sum3, sum4, sum3, #9 - stp sum2, sum3, [z, #16] - extr sum4, sum5, sum4, #9 - extr sum5, sum6, sum5, #9 - stp sum4, sum5, [z, #32] - extr sum6, sum7, sum6, #9 - extr sum7, sum8, sum7, #9 - stp sum6, sum7, [z, #48] - str h, [z, #64] + and h, sum0, #0x1FF + extr sum0, sum1, sum0, #9 + extr sum1, sum2, sum1, #9 + stp sum0, sum1, [z] + extr sum2, sum3, sum2, #9 + extr sum3, sum4, sum3, #9 + stp sum2, sum3, [z, #16] + extr sum4, sum5, sum4, #9 + extr sum5, sum6, sum5, #9 + stp sum4, sum5, [z, #32] + extr sum6, sum7, sum6, #9 + extr sum7, sum8, sum7, #9 + stp sum6, sum7, [z, #48] + str h, [z, #64] // Restore regs and return - add sp, sp, #80 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 - ret + add sp, sp, #80 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_mul_p521_alt.S b/arm/p521/bignum_mul_p521_alt.S index 6b0ca2793f..aac74488ed 100644 --- a/arm/p521/bignum_mul_p521_alt.S +++ b/arm/p521/bignum_mul_p521_alt.S @@ -82,462 +82,462 @@ _bignum_mul_p521_alt: // Save more registers and make temporary space on stack - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - sub sp, sp, #64 + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, #64 // Load operands and set up row 0 = [u9;...;u0] = a0 * [b8;...;b0] - ldp a0, a1, [x] - ldp b0, b1, [y] + ldp a0, a1, [x] + ldp b0, b1, [y] - mul u0, a0, b0 - umulh u1, a0, b0 - mul t, a0, b1 - umulh u2, a0, b1 - adds u1, u1, t + mul u0, a0, b0 + umulh u1, a0, b0 + mul t, a0, b1 + umulh u2, a0, b1 + adds u1, u1, t - ldp b2, b3, [y, #16] + ldp b2, b3, [y, #16] - mul t, a0, b2 - umulh u3, a0, b2 - adcs u2, u2, t + mul t, a0, b2 + umulh u3, a0, b2 + adcs u2, u2, t - mul t, a0, b3 - umulh u4, a0, b3 - adcs u3, u3, t + mul t, a0, b3 + umulh u4, a0, b3 + adcs u3, u3, t - ldp b4, b5, [y, #32] + ldp b4, b5, [y, #32] - mul t, a0, b4 - umulh u5, a0, b4 - adcs u4, u4, t + mul t, a0, b4 + umulh u5, a0, b4 + adcs u4, u4, t - mul t, a0, b5 - umulh u6, a0, b5 - adcs u5, u5, t + mul t, a0, b5 + umulh u6, a0, b5 + adcs u5, u5, t - ldp b6, b7, [y, #48] + ldp b6, b7, [y, #48] - mul t, a0, b6 - umulh u7, a0, b6 - adcs u6, u6, t + mul t, a0, b6 + umulh u7, a0, b6 + adcs u6, u6, t - ldr b8, [y, #64] + ldr b8, [y, #64] - mul t, a0, b7 - umulh u8, a0, b7 - adcs u7, u7, t + mul t, a0, b7 + umulh u8, a0, b7 + adcs u7, u7, t - mul t, a0, b8 - umulh u9, a0, b8 - adcs u8, u8, t + mul t, a0, b8 + umulh u9, a0, b8 + adcs u8, u8, t - adc u9, u9, xzr + adc u9, u9, xzr // Row 1 = [u10;...;u0] = [a1;a0] * [b8;...;b0] - mul t, a1, b0 - adds u1, u1, t - mul t, a1, b1 - adcs u2, u2, t - mul t, a1, b2 - adcs u3, u3, t - mul t, a1, b3 - adcs u4, u4, t - mul t, a1, b4 - adcs u5, u5, t - mul t, a1, b5 - adcs u6, u6, t - mul t, a1, b6 - adcs u7, u7, t - mul t, a1, b7 - adcs u8, u8, t - mul t, a1, b8 - adcs u9, u9, t - cset u10, cs - - umulh t, a1, b0 - adds u2, u2, t - umulh t, a1, b1 - adcs u3, u3, t - umulh t, a1, b2 - adcs u4, u4, t - umulh t, a1, b3 - adcs u5, u5, t - umulh t, a1, b4 - adcs u6, u6, t - umulh t, a1, b5 - adcs u7, u7, t - umulh t, a1, b6 - adcs u8, u8, t - umulh t, a1, b7 - adcs u9, u9, t - umulh t, a1, b8 - adc u10, u10, t - - stp u0, u1, [sp] + mul t, a1, b0 + adds u1, u1, t + mul t, a1, b1 + adcs u2, u2, t + mul t, a1, b2 + adcs u3, u3, t + mul t, a1, b3 + adcs u4, u4, t + mul t, a1, b4 + adcs u5, u5, t + mul t, a1, b5 + adcs u6, u6, t + mul t, a1, b6 + adcs u7, u7, t + mul t, a1, b7 + adcs u8, u8, t + mul t, a1, b8 + adcs u9, u9, t + cset u10, cs + + umulh t, a1, b0 + adds u2, u2, t + umulh t, a1, b1 + adcs u3, u3, t + umulh t, a1, b2 + adcs u4, u4, t + umulh t, a1, b3 + adcs u5, u5, t + umulh t, a1, b4 + adcs u6, u6, t + umulh t, a1, b5 + adcs u7, u7, t + umulh t, a1, b6 + adcs u8, u8, t + umulh t, a1, b7 + adcs u9, u9, t + umulh t, a1, b8 + adc u10, u10, t + + stp u0, u1, [sp] // Row 2 = [u11;...;u0] = [a2;a1;a0] * [b8;...;b0] - ldp a2, a3, [x, #16] - - mul t, a2, b0 - adds u2, u2, t - mul t, a2, b1 - adcs u3, u3, t - mul t, a2, b2 - adcs u4, u4, t - mul t, a2, b3 - adcs u5, u5, t - mul t, a2, b4 - adcs u6, u6, t - mul t, a2, b5 - adcs u7, u7, t - mul t, a2, b6 - adcs u8, u8, t - mul t, a2, b7 - adcs u9, u9, t - mul t, a2, b8 - adcs u10, u10, t - cset u11, cs - - umulh t, a2, b0 - adds u3, u3, t - umulh t, a2, b1 - adcs u4, u4, t - umulh t, a2, b2 - adcs u5, u5, t - umulh t, a2, b3 - adcs u6, u6, t - umulh t, a2, b4 - adcs u7, u7, t - umulh t, a2, b5 - adcs u8, u8, t - umulh t, a2, b6 - adcs u9, u9, t - umulh t, a2, b7 - adcs u10, u10, t - umulh t, a2, b8 - adc u11, u11, t + ldp a2, a3, [x, #16] + + mul t, a2, b0 + adds u2, u2, t + mul t, a2, b1 + adcs u3, u3, t + mul t, a2, b2 + adcs u4, u4, t + mul t, a2, b3 + adcs u5, u5, t + mul t, a2, b4 + adcs u6, u6, t + mul t, a2, b5 + adcs u7, u7, t + mul t, a2, b6 + adcs u8, u8, t + mul t, a2, b7 + adcs u9, u9, t + mul t, a2, b8 + adcs u10, u10, t + cset u11, cs + + umulh t, a2, b0 + adds u3, u3, t + umulh t, a2, b1 + adcs u4, u4, t + umulh t, a2, b2 + adcs u5, u5, t + umulh t, a2, b3 + adcs u6, u6, t + umulh t, a2, b4 + adcs u7, u7, t + umulh t, a2, b5 + adcs u8, u8, t + umulh t, a2, b6 + adcs u9, u9, t + umulh t, a2, b7 + adcs u10, u10, t + umulh t, a2, b8 + adc u11, u11, t // Row 3 = [u12;...;u0] = [a3;a2;a1;a0] * [b8;...;b0] - mul t, a3, b0 - adds u3, u3, t - mul t, a3, b1 - adcs u4, u4, t - mul t, a3, b2 - adcs u5, u5, t - mul t, a3, b3 - adcs u6, u6, t - mul t, a3, b4 - adcs u7, u7, t - mul t, a3, b5 - adcs u8, u8, t - mul t, a3, b6 - adcs u9, u9, t - mul t, a3, b7 - adcs u10, u10, t - mul t, a3, b8 - adcs u11, u11, t - cset u12, cs - - umulh t, a3, b0 - adds u4, u4, t - umulh t, a3, b1 - adcs u5, u5, t - umulh t, a3, b2 - adcs u6, u6, t - umulh t, a3, b3 - adcs u7, u7, t - umulh t, a3, b4 - adcs u8, u8, t - umulh t, a3, b5 - adcs u9, u9, t - umulh t, a3, b6 - adcs u10, u10, t - umulh t, a3, b7 - adcs u11, u11, t - umulh t, a3, b8 - adc u12, u12, t - - stp u2, u3, [sp, #16] + mul t, a3, b0 + adds u3, u3, t + mul t, a3, b1 + adcs u4, u4, t + mul t, a3, b2 + adcs u5, u5, t + mul t, a3, b3 + adcs u6, u6, t + mul t, a3, b4 + adcs u7, u7, t + mul t, a3, b5 + adcs u8, u8, t + mul t, a3, b6 + adcs u9, u9, t + mul t, a3, b7 + adcs u10, u10, t + mul t, a3, b8 + adcs u11, u11, t + cset u12, cs + + umulh t, a3, b0 + adds u4, u4, t + umulh t, a3, b1 + adcs u5, u5, t + umulh t, a3, b2 + adcs u6, u6, t + umulh t, a3, b3 + adcs u7, u7, t + umulh t, a3, b4 + adcs u8, u8, t + umulh t, a3, b5 + adcs u9, u9, t + umulh t, a3, b6 + adcs u10, u10, t + umulh t, a3, b7 + adcs u11, u11, t + umulh t, a3, b8 + adc u12, u12, t + + stp u2, u3, [sp, #16] // Row 4 = [u13;...;u0] = [a4;a3;a2;a1;a0] * [b8;...;b0] - ldp a4, a5, [x, #32] - - mul t, a4, b0 - adds u4, u4, t - mul t, a4, b1 - adcs u5, u5, t - mul t, a4, b2 - adcs u6, u6, t - mul t, a4, b3 - adcs u7, u7, t - mul t, a4, b4 - adcs u8, u8, t - mul t, a4, b5 - adcs u9, u9, t - mul t, a4, b6 - adcs u10, u10, t - mul t, a4, b7 - adcs u11, u11, t - mul t, a4, b8 - adcs u12, u12, t - cset u13, cs - - umulh t, a4, b0 - adds u5, u5, t - umulh t, a4, b1 - adcs u6, u6, t - umulh t, a4, b2 - adcs u7, u7, t - umulh t, a4, b3 - adcs u8, u8, t - umulh t, a4, b4 - adcs u9, u9, t - umulh t, a4, b5 - adcs u10, u10, t - umulh t, a4, b6 - adcs u11, u11, t - umulh t, a4, b7 - adcs u12, u12, t - umulh t, a4, b8 - adc u13, u13, t + ldp a4, a5, [x, #32] + + mul t, a4, b0 + adds u4, u4, t + mul t, a4, b1 + adcs u5, u5, t + mul t, a4, b2 + adcs u6, u6, t + mul t, a4, b3 + adcs u7, u7, t + mul t, a4, b4 + adcs u8, u8, t + mul t, a4, b5 + adcs u9, u9, t + mul t, a4, b6 + adcs u10, u10, t + mul t, a4, b7 + adcs u11, u11, t + mul t, a4, b8 + adcs u12, u12, t + cset u13, cs + + umulh t, a4, b0 + adds u5, u5, t + umulh t, a4, b1 + adcs u6, u6, t + umulh t, a4, b2 + adcs u7, u7, t + umulh t, a4, b3 + adcs u8, u8, t + umulh t, a4, b4 + adcs u9, u9, t + umulh t, a4, b5 + adcs u10, u10, t + umulh t, a4, b6 + adcs u11, u11, t + umulh t, a4, b7 + adcs u12, u12, t + umulh t, a4, b8 + adc u13, u13, t // Row 5 = [u14;...;u0] = [a5;a4;a3;a2;a1;a0] * [b8;...;b0] - mul t, a5, b0 - adds u5, u5, t - mul t, a5, b1 - adcs u6, u6, t - mul t, a5, b2 - adcs u7, u7, t - mul t, a5, b3 - adcs u8, u8, t - mul t, a5, b4 - adcs u9, u9, t - mul t, a5, b5 - adcs u10, u10, t - mul t, a5, b6 - adcs u11, u11, t - mul t, a5, b7 - adcs u12, u12, t - mul t, a5, b8 - adcs u13, u13, t - cset u14, cs - - umulh t, a5, b0 - adds u6, u6, t - umulh t, a5, b1 - adcs u7, u7, t - umulh t, a5, b2 - adcs u8, u8, t - umulh t, a5, b3 - adcs u9, u9, t - umulh t, a5, b4 - adcs u10, u10, t - umulh t, a5, b5 - adcs u11, u11, t - umulh t, a5, b6 - adcs u12, u12, t - umulh t, a5, b7 - adcs u13, u13, t - umulh t, a5, b8 - adc u14, u14, t - - stp u4, u5, [sp, #32] + mul t, a5, b0 + adds u5, u5, t + mul t, a5, b1 + adcs u6, u6, t + mul t, a5, b2 + adcs u7, u7, t + mul t, a5, b3 + adcs u8, u8, t + mul t, a5, b4 + adcs u9, u9, t + mul t, a5, b5 + adcs u10, u10, t + mul t, a5, b6 + adcs u11, u11, t + mul t, a5, b7 + adcs u12, u12, t + mul t, a5, b8 + adcs u13, u13, t + cset u14, cs + + umulh t, a5, b0 + adds u6, u6, t + umulh t, a5, b1 + adcs u7, u7, t + umulh t, a5, b2 + adcs u8, u8, t + umulh t, a5, b3 + adcs u9, u9, t + umulh t, a5, b4 + adcs u10, u10, t + umulh t, a5, b5 + adcs u11, u11, t + umulh t, a5, b6 + adcs u12, u12, t + umulh t, a5, b7 + adcs u13, u13, t + umulh t, a5, b8 + adc u14, u14, t + + stp u4, u5, [sp, #32] // Row 6 = [u15;...;u0] = [a6;a5;a4;a3;a2;a1;a0] * [b8;...;b0] - ldp a6, a7, [x, #48] - - mul t, a6, b0 - adds u6, u6, t - mul t, a6, b1 - adcs u7, u7, t - mul t, a6, b2 - adcs u8, u8, t - mul t, a6, b3 - adcs u9, u9, t - mul t, a6, b4 - adcs u10, u10, t - mul t, a6, b5 - adcs u11, u11, t - mul t, a6, b6 - adcs u12, u12, t - mul t, a6, b7 - adcs u13, u13, t - mul t, a6, b8 - adcs u14, u14, t - cset u15, cs - - umulh t, a6, b0 - adds u7, u7, t - umulh t, a6, b1 - adcs u8, u8, t - umulh t, a6, b2 - adcs u9, u9, t - umulh t, a6, b3 - adcs u10, u10, t - umulh t, a6, b4 - adcs u11, u11, t - umulh t, a6, b5 - adcs u12, u12, t - umulh t, a6, b6 - adcs u13, u13, t - umulh t, a6, b7 - adcs u14, u14, t - umulh t, a6, b8 - adc u15, u15, t + ldp a6, a7, [x, #48] + + mul t, a6, b0 + adds u6, u6, t + mul t, a6, b1 + adcs u7, u7, t + mul t, a6, b2 + adcs u8, u8, t + mul t, a6, b3 + adcs u9, u9, t + mul t, a6, b4 + adcs u10, u10, t + mul t, a6, b5 + adcs u11, u11, t + mul t, a6, b6 + adcs u12, u12, t + mul t, a6, b7 + adcs u13, u13, t + mul t, a6, b8 + adcs u14, u14, t + cset u15, cs + + umulh t, a6, b0 + adds u7, u7, t + umulh t, a6, b1 + adcs u8, u8, t + umulh t, a6, b2 + adcs u9, u9, t + umulh t, a6, b3 + adcs u10, u10, t + umulh t, a6, b4 + adcs u11, u11, t + umulh t, a6, b5 + adcs u12, u12, t + umulh t, a6, b6 + adcs u13, u13, t + umulh t, a6, b7 + adcs u14, u14, t + umulh t, a6, b8 + adc u15, u15, t // Row 7 = [u16;...;u0] = [a7;a6;a5;a4;a3;a2;a1;a0] * [b8;...;b0] - mul t, a7, b0 - adds u7, u7, t - mul t, a7, b1 - adcs u8, u8, t - mul t, a7, b2 - adcs u9, u9, t - mul t, a7, b3 - adcs u10, u10, t - mul t, a7, b4 - adcs u11, u11, t - mul t, a7, b5 - adcs u12, u12, t - mul t, a7, b6 - adcs u13, u13, t - mul t, a7, b7 - adcs u14, u14, t - mul t, a7, b8 - adcs u15, u15, t - cset u16, cs - - umulh t, a7, b0 - adds u8, u8, t - umulh t, a7, b1 - adcs u9, u9, t - umulh t, a7, b2 - adcs u10, u10, t - umulh t, a7, b3 - adcs u11, u11, t - umulh t, a7, b4 - adcs u12, u12, t - umulh t, a7, b5 - adcs u13, u13, t - umulh t, a7, b6 - adcs u14, u14, t - umulh t, a7, b7 - adcs u15, u15, t - umulh t, a7, b8 - adc u16, u16, t - - stp u6, u7, [sp, #48] + mul t, a7, b0 + adds u7, u7, t + mul t, a7, b1 + adcs u8, u8, t + mul t, a7, b2 + adcs u9, u9, t + mul t, a7, b3 + adcs u10, u10, t + mul t, a7, b4 + adcs u11, u11, t + mul t, a7, b5 + adcs u12, u12, t + mul t, a7, b6 + adcs u13, u13, t + mul t, a7, b7 + adcs u14, u14, t + mul t, a7, b8 + adcs u15, u15, t + cset u16, cs + + umulh t, a7, b0 + adds u8, u8, t + umulh t, a7, b1 + adcs u9, u9, t + umulh t, a7, b2 + adcs u10, u10, t + umulh t, a7, b3 + adcs u11, u11, t + umulh t, a7, b4 + adcs u12, u12, t + umulh t, a7, b5 + adcs u13, u13, t + umulh t, a7, b6 + adcs u14, u14, t + umulh t, a7, b7 + adcs u15, u15, t + umulh t, a7, b8 + adc u16, u16, t + + stp u6, u7, [sp, #48] // Row 8 = [u16;...;u0] = [a8;a7;a6;a5;a4;a3;a2;a1;a0] * [b8;...;b0] - ldr a8, [x, #64] - - mul t, a8, b0 - adds u8, u8, t - mul t, a8, b1 - adcs u9, u9, t - mul t, a8, b2 - adcs u10, u10, t - mul t, a8, b3 - adcs u11, u11, t - mul t, a8, b4 - adcs u12, u12, t - mul t, a8, b5 - adcs u13, u13, t - mul t, a8, b6 - adcs u14, u14, t - mul t, a8, b7 - adcs u15, u15, t - mul t, a8, b8 - adc u16, u16, t - - umulh t, a8, b0 - adds u9, u9, t - umulh t, a8, b1 - adcs u10, u10, t - umulh t, a8, b2 - adcs u11, u11, t - umulh t, a8, b3 - adcs u12, u12, t - umulh t, a8, b4 - adcs u13, u13, t - umulh t, a8, b5 - adcs u14, u14, t - umulh t, a8, b6 - adcs u15, u15, t - umulh t, a8, b7 - adc u16, u16, t + ldr a8, [x, #64] + + mul t, a8, b0 + adds u8, u8, t + mul t, a8, b1 + adcs u9, u9, t + mul t, a8, b2 + adcs u10, u10, t + mul t, a8, b3 + adcs u11, u11, t + mul t, a8, b4 + adcs u12, u12, t + mul t, a8, b5 + adcs u13, u13, t + mul t, a8, b6 + adcs u14, u14, t + mul t, a8, b7 + adcs u15, u15, t + mul t, a8, b8 + adc u16, u16, t + + umulh t, a8, b0 + adds u9, u9, t + umulh t, a8, b1 + adcs u10, u10, t + umulh t, a8, b2 + adcs u11, u11, t + umulh t, a8, b3 + adcs u12, u12, t + umulh t, a8, b4 + adcs u13, u13, t + umulh t, a8, b5 + adcs u14, u14, t + umulh t, a8, b6 + adcs u15, u15, t + umulh t, a8, b7 + adc u16, u16, t // Now we have the full product, which we consider as // 2^521 * h + l. Form h + l + 1 - subs xzr, xzr, xzr - ldp b0, b1, [sp] - extr t, u9, u8, #9 - adcs b0, b0, t - extr t, u10, u9, #9 - adcs b1, b1, t - ldp b2, b3, [sp, #16] - extr t, u11, u10, #9 - adcs b2, b2, t - extr t, u12, u11, #9 - adcs b3, b3, t - ldp b4, b5, [sp, #32] - extr t, u13, u12, #9 - adcs b4, b4, t - extr t, u14, u13, #9 - adcs b5, b5, t - ldp b6, b7, [sp, #48] - extr t, u15, u14, #9 - adcs b6, b6, t - extr t, u16, u15, #9 - adcs b7, b7, t - orr b8, u8, #~0x1FF - lsr t, u16, #9 - adcs b8, b8, t + subs xzr, xzr, xzr + ldp b0, b1, [sp] + extr t, u9, u8, #9 + adcs b0, b0, t + extr t, u10, u9, #9 + adcs b1, b1, t + ldp b2, b3, [sp, #16] + extr t, u11, u10, #9 + adcs b2, b2, t + extr t, u12, u11, #9 + adcs b3, b3, t + ldp b4, b5, [sp, #32] + extr t, u13, u12, #9 + adcs b4, b4, t + extr t, u14, u13, #9 + adcs b5, b5, t + ldp b6, b7, [sp, #48] + extr t, u15, u14, #9 + adcs b6, b6, t + extr t, u16, u15, #9 + adcs b7, b7, t + orr b8, u8, #~0x1FF + lsr t, u16, #9 + adcs b8, b8, t // Now CF is set if h + l + 1 >= 2^521, which means it's already // the answer, while if ~CF the answer is h + l so we should subtract // 1 (all considered in 521 bits). Hence subtract ~CF and mask. - sbcs b0, b0, xzr - sbcs b1, b1, xzr - sbcs b2, b2, xzr - sbcs b3, b3, xzr - sbcs b4, b4, xzr - sbcs b5, b5, xzr - sbcs b6, b6, xzr - sbcs b7, b7, xzr - sbc b8, b8, xzr - and b8, b8, #0x1FF + sbcs b0, b0, xzr + sbcs b1, b1, xzr + sbcs b2, b2, xzr + sbcs b3, b3, xzr + sbcs b4, b4, xzr + sbcs b5, b5, xzr + sbcs b6, b6, xzr + sbcs b7, b7, xzr + sbc b8, b8, xzr + and b8, b8, #0x1FF // Store back digits of final result - stp b0, b1, [z] - stp b2, b3, [z, #16] - stp b4, b5, [z, #32] - stp b6, b7, [z, #48] - str b8, [z, #64] + stp b0, b1, [z] + stp b2, b3, [z, #16] + stp b4, b5, [z, #32] + stp b6, b7, [z, #48] + str b8, [z, #64] // Restore registers - add sp, sp, #64 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + add sp, sp, #64 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_neg_p521.S b/arm/p521/bignum_neg_p521.S index 825f0af295..e13bac25c0 100644 --- a/arm/p521/bignum_neg_p521.S +++ b/arm/p521/bignum_neg_p521.S @@ -47,48 +47,48 @@ _bignum_neg_p521: // Load the 9 digits of x and generate p = the OR of them all - ldp d0, d1, [x] - orr d6, d0, d1 - ldp d2, d3, [x, #16] - orr d7, d2, d3 - orr p, d6, d7 - ldp d4, d5, [x, #32] - orr d8, d4, d5 - orr p, p, d8 - ldp d6, d7, [x, #48] - orr d8, d6, d7 - orr p, p, d8 - ldr d8, [x, #64] - orr p, p, d8 + ldp d0, d1, [x] + orr d6, d0, d1 + ldp d2, d3, [x, #16] + orr d7, d2, d3 + orr p, d6, d7 + ldp d4, d5, [x, #32] + orr d8, d4, d5 + orr p, p, d8 + ldp d6, d7, [x, #48] + orr d8, d6, d7 + orr p, p, d8 + ldr d8, [x, #64] + orr p, p, d8 // Turn p into a bitmask for "input is nonzero", so that we avoid doing // -0 = p_521 and hence maintain strict modular reduction - cmp p, #0 - csetm p, ne + cmp p, #0 + csetm p, ne // Since p_521 is all 1s, the subtraction is just an exclusive-or with p // to give an optional inversion, with a slight fiddle for the top digit. - eor d0, d0, p - eor d1, d1, p - eor d2, d2, p - eor d3, d3, p - eor d4, d4, p - eor d5, d5, p - eor d6, d6, p - eor d7, d7, p - and p, p, #0x1FF - eor d8, d8, p + eor d0, d0, p + eor d1, d1, p + eor d2, d2, p + eor d3, d3, p + eor d4, d4, p + eor d5, d5, p + eor d6, d6, p + eor d7, d7, p + and p, p, #0x1FF + eor d8, d8, p // Write back the result and return - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] - ret + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_optneg_p521.S b/arm/p521/bignum_optneg_p521.S index b7415e5dd1..3834f9abd9 100644 --- a/arm/p521/bignum_optneg_p521.S +++ b/arm/p521/bignum_optneg_p521.S @@ -49,50 +49,50 @@ _bignum_optneg_p521: // Load the 9 digits of x and generate q = the OR of them all - ldp d0, d1, [x] - orr d6, d0, d1 - ldp d2, d3, [x, #16] - orr d7, d2, d3 - orr q, d6, d7 - ldp d4, d5, [x, #32] - orr d8, d4, d5 - orr q, q, d8 - ldp d6, d7, [x, #48] - orr d8, d6, d7 - orr q, q, d8 - ldr d8, [x, #64] - orr q, q, d8 + ldp d0, d1, [x] + orr d6, d0, d1 + ldp d2, d3, [x, #16] + orr d7, d2, d3 + orr q, d6, d7 + ldp d4, d5, [x, #32] + orr d8, d4, d5 + orr q, q, d8 + ldp d6, d7, [x, #48] + orr d8, d6, d7 + orr q, q, d8 + ldr d8, [x, #64] + orr q, q, d8 // Turn q into a bitmask for "input is nonzero and p is nonzero", so that // we avoid doing -0 = p_521 and hence maintain strict modular reduction - cmp q, #0 - csetm q, ne - cmp p, #0 - csel q, xzr, q, eq + cmp q, #0 + csetm q, ne + cmp p, #0 + csel q, xzr, q, eq // Since p_521 is all 1s, the subtraction is just an exclusive-or with q // to give an optional inversion, with a slight fiddle for the top digit. - eor d0, d0, q - eor d1, d1, q - eor d2, d2, q - eor d3, d3, q - eor d4, d4, q - eor d5, d5, q - eor d6, d6, q - eor d7, d7, q - and q, q, #0x1FF - eor d8, d8, q + eor d0, d0, q + eor d1, d1, q + eor d2, d2, q + eor d3, d3, q + eor d4, d4, q + eor d5, d5, q + eor d6, d6, q + eor d7, d7, q + and q, q, #0x1FF + eor d8, d8, q // Write back the result and return - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] - ret + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_sqr_p521.S b/arm/p521/bignum_sqr_p521.S index 7bce2a3e22..a3c618b5b6 100644 --- a/arm/p521/bignum_sqr_p521.S +++ b/arm/p521/bignum_sqr_p521.S @@ -72,82 +72,82 @@ _bignum_sqr_p521: // Save registers - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! // Load all the inputs first - ldp a0, a1, [x] - ldp a2, a3, [x, #16] - ldp b0, b1, [x, #32] - ldp b2, b3, [x, #48] + ldp a0, a1, [x] + ldp a2, a3, [x, #16] + ldp b0, b1, [x, #32] + ldp b2, b3, [x, #48] // Square the upper half with a register-renamed variant of bignum_sqr_4_8 - mul s2, b0, b2 - mul s7, b1, b3 - umulh t, b0, b2 - subs u, b0, b1 - cneg u, u, cc - csetm s1, cc - subs s0, b3, b2 - cneg s0, s0, cc - mul s6, u, s0 - umulh s0, u, s0 - cinv s1, s1, cc - eor s6, s6, s1 - eor s0, s0, s1 - adds s3, s2, t - adc t, t, xzr - umulh u, b1, b3 - adds s3, s3, s7 - adcs t, t, u - adc u, u, xzr - adds t, t, s7 - adc u, u, xzr - cmn s1, #0x1 - adcs s3, s3, s6 - adcs t, t, s0 - adc u, u, s1 - adds s2, s2, s2 - adcs s3, s3, s3 - adcs t, t, t - adcs u, u, u - adc c, xzr, xzr - mul s0, b0, b0 - mul s6, b1, b1 - mul l, b0, b1 - umulh s1, b0, b0 - umulh s7, b1, b1 - umulh h, b0, b1 - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s2, s2, s6 - adcs s3, s3, s7 - adcs t, t, xzr - adcs u, u, xzr - adc c, c, xzr - mul s4, b2, b2 - mul s6, b3, b3 - mul l, b2, b3 - umulh s5, b2, b2 - umulh s7, b3, b3 - umulh h, b2, b3 - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s4, s4, t - adcs s5, s5, u - adcs s6, s6, c - adc s7, s7, xzr + mul s2, b0, b2 + mul s7, b1, b3 + umulh t, b0, b2 + subs u, b0, b1 + cneg u, u, cc + csetm s1, cc + subs s0, b3, b2 + cneg s0, s0, cc + mul s6, u, s0 + umulh s0, u, s0 + cinv s1, s1, cc + eor s6, s6, s1 + eor s0, s0, s1 + adds s3, s2, t + adc t, t, xzr + umulh u, b1, b3 + adds s3, s3, s7 + adcs t, t, u + adc u, u, xzr + adds t, t, s7 + adc u, u, xzr + cmn s1, #0x1 + adcs s3, s3, s6 + adcs t, t, s0 + adc u, u, s1 + adds s2, s2, s2 + adcs s3, s3, s3 + adcs t, t, t + adcs u, u, u + adc c, xzr, xzr + mul s0, b0, b0 + mul s6, b1, b1 + mul l, b0, b1 + umulh s1, b0, b0 + umulh s7, b1, b1 + umulh h, b0, b1 + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s2, s2, s6 + adcs s3, s3, s7 + adcs t, t, xzr + adcs u, u, xzr + adc c, c, xzr + mul s4, b2, b2 + mul s6, b3, b3 + mul l, b2, b3 + umulh s5, b2, b2 + umulh s7, b3, b3 + umulh h, b2, b3 + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s4, s4, t + adcs s5, s5, u + adcs s6, s6, c + adc s7, s7, xzr // Augment the high part with the contribution from the top little word C. // If we write the input as 2^512 * C + x then we are otherwise just doing @@ -162,364 +162,364 @@ _bignum_sqr_p521: // equally well use 53 or 54 since they are still <= 64 - 10, but below // 52 we would end up using more multiplications. - ldr c, [x, #64] - add u, c, c - mul c, c, c + ldr c, [x, #64] + add u, c, c + mul c, c, c // 0 * 52 = 64 * 0 + 0 - and l, a0, #0x000fffffffffffff - mul l, u, l + and l, a0, #0x000fffffffffffff + mul l, u, l // 1 * 52 = 64 * 0 + 52 - extr h, a1, a0, #52 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + extr h, a1, a0, #52 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #12 - adds s0, s0, t + lsl l, l, #12 + extr t, h, l, #12 + adds s0, s0, t // 2 * 52 = 64 * 1 + 40 - extr l, a2, a1, #40 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, a2, a1, #40 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #24 - adcs s1, s1, t + lsl h, h, #12 + extr t, l, h, #24 + adcs s1, s1, t // 3 * 52 = 64 * 2 + 28 - extr h, a3, a2, #28 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + extr h, a3, a2, #28 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #36 - adcs s2, s2, t + lsl l, l, #12 + extr t, h, l, #36 + adcs s2, s2, t // 4 * 52 = 64 * 3 + 16 - extr l, b0, a3, #16 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, b0, a3, #16 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #48 - adcs s3, s3, t + lsl h, h, #12 + extr t, l, h, #48 + adcs s3, s3, t // 5 * 52 = 64 * 4 + 4 - lsr h, b0, #4 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + lsr h, b0, #4 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr v, h, l, #60 + lsl l, l, #12 + extr v, h, l, #60 // 6 * 52 = 64 * 4 + 56 - extr l, b1, b0, #56 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, b1, b0, #56 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl v, v, #8 - extr t, l, v, #8 - adcs s4, s4, t + lsl v, v, #8 + extr t, l, v, #8 + adcs s4, s4, t // 7 * 52 = 64 * 5 + 44 - extr h, b2, b1, #44 - and h, h, #0x000fffffffffffff - mul h, u, h - lsr t, l, #52 - add h, h, t + extr h, b2, b1, #44 + and h, h, #0x000fffffffffffff + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #20 - adcs s5, s5, t + lsl l, l, #12 + extr t, h, l, #20 + adcs s5, s5, t // 8 * 52 = 64 * 6 + 32 - extr l, b3, b2, #32 - and l, l, #0x000fffffffffffff - mul l, u, l - lsr t, h, #52 - add l, l, t + extr l, b3, b2, #32 + and l, l, #0x000fffffffffffff + mul l, u, l + lsr t, h, #52 + add l, l, t - lsl h, h, #12 - extr t, l, h, #32 - adcs s6, s6, t + lsl h, h, #12 + extr t, l, h, #32 + adcs s6, s6, t // 9 * 52 = 64 * 7 + 20 - lsr h, b3, #20 - mul h, u, h - lsr t, l, #52 - add h, h, t + lsr h, b3, #20 + mul h, u, h + lsr t, l, #52 + add h, h, t - lsl l, l, #12 - extr t, h, l, #44 - adcs s7, s7, t + lsl l, l, #12 + extr t, h, l, #44 + adcs s7, s7, t // Top word - lsr h, h, #44 - adc c, c, h + lsr h, h, #44 + adc c, c, h // Rotate [c;s7;...;s0] before storing in the buffer. // We want to add 2^512 * H', which splitting H' at bit 9 is // 2^521 * H_top + 2^512 * H_bot == 2^512 * H_bot + H_top (mod p_521) - extr l, s1, s0, #9 - extr h, s2, s1, #9 - stp l, h, [z] + extr l, s1, s0, #9 + extr h, s2, s1, #9 + stp l, h, [z] - extr l, s3, s2, #9 - extr h, s4, s3, #9 - stp l, h, [z, #16] + extr l, s3, s2, #9 + extr h, s4, s3, #9 + stp l, h, [z, #16] - extr l, s5, s4, #9 - extr h, s6, s5, #9 - stp l, h, [z, #32] + extr l, s5, s4, #9 + extr h, s6, s5, #9 + stp l, h, [z, #32] - extr l, s7, s6, #9 - extr h, c, s7, #9 - stp l, h, [z, #48] + extr l, s7, s6, #9 + extr h, c, s7, #9 + stp l, h, [z, #48] - and t, s0, #0x1FF - lsr c, c, #9 - add t, t, c - str t, [z, #64] + and t, s0, #0x1FF + lsr c, c, #9 + add t, t, c + str t, [z, #64] // Square the lower half with an analogous variant of bignum_sqr_4_8 - mul s2, a0, a2 - mul s7, a1, a3 - umulh t, a0, a2 - subs u, a0, a1 - cneg u, u, cc - csetm s1, cc - subs s0, a3, a2 - cneg s0, s0, cc - mul s6, u, s0 - umulh s0, u, s0 - cinv s1, s1, cc - eor s6, s6, s1 - eor s0, s0, s1 - adds s3, s2, t - adc t, t, xzr - umulh u, a1, a3 - adds s3, s3, s7 - adcs t, t, u - adc u, u, xzr - adds t, t, s7 - adc u, u, xzr - cmn s1, #0x1 - adcs s3, s3, s6 - adcs t, t, s0 - adc u, u, s1 - adds s2, s2, s2 - adcs s3, s3, s3 - adcs t, t, t - adcs u, u, u - adc c, xzr, xzr - mul s0, a0, a0 - mul s6, a1, a1 - mul l, a0, a1 - umulh s1, a0, a0 - umulh s7, a1, a1 - umulh h, a0, a1 - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s1, s1, l - adcs s6, s6, h - adc s7, s7, xzr - adds s2, s2, s6 - adcs s3, s3, s7 - adcs t, t, xzr - adcs u, u, xzr - adc c, c, xzr - mul s4, a2, a2 - mul s6, a3, a3 - mul l, a2, a3 - umulh s5, a2, a2 - umulh s7, a3, a3 - umulh h, a2, a3 - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s5, s5, l - adcs s6, s6, h - adc s7, s7, xzr - adds s4, s4, t - adcs s5, s5, u - adcs s6, s6, c - adc s7, s7, xzr + mul s2, a0, a2 + mul s7, a1, a3 + umulh t, a0, a2 + subs u, a0, a1 + cneg u, u, cc + csetm s1, cc + subs s0, a3, a2 + cneg s0, s0, cc + mul s6, u, s0 + umulh s0, u, s0 + cinv s1, s1, cc + eor s6, s6, s1 + eor s0, s0, s1 + adds s3, s2, t + adc t, t, xzr + umulh u, a1, a3 + adds s3, s3, s7 + adcs t, t, u + adc u, u, xzr + adds t, t, s7 + adc u, u, xzr + cmn s1, #0x1 + adcs s3, s3, s6 + adcs t, t, s0 + adc u, u, s1 + adds s2, s2, s2 + adcs s3, s3, s3 + adcs t, t, t + adcs u, u, u + adc c, xzr, xzr + mul s0, a0, a0 + mul s6, a1, a1 + mul l, a0, a1 + umulh s1, a0, a0 + umulh s7, a1, a1 + umulh h, a0, a1 + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s1, s1, l + adcs s6, s6, h + adc s7, s7, xzr + adds s2, s2, s6 + adcs s3, s3, s7 + adcs t, t, xzr + adcs u, u, xzr + adc c, c, xzr + mul s4, a2, a2 + mul s6, a3, a3 + mul l, a2, a3 + umulh s5, a2, a2 + umulh s7, a3, a3 + umulh h, a2, a3 + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s5, s5, l + adcs s6, s6, h + adc s7, s7, xzr + adds s4, s4, t + adcs s5, s5, u + adcs s6, s6, c + adc s7, s7, xzr // Add it directly to the existing buffer - ldp l, h, [z] - adds l, l, s0 - adcs h, h, s1 - stp l, h, [z] + ldp l, h, [z] + adds l, l, s0 + adcs h, h, s1 + stp l, h, [z] - ldp l, h, [z, #16] - adcs l, l, s2 - adcs h, h, s3 - stp l, h, [z, #16] + ldp l, h, [z, #16] + adcs l, l, s2 + adcs h, h, s3 + stp l, h, [z, #16] - ldp l, h, [z, #32] - adcs l, l, s4 - adcs h, h, s5 - stp l, h, [z, #32] + ldp l, h, [z, #32] + adcs l, l, s4 + adcs h, h, s5 + stp l, h, [z, #32] - ldp l, h, [z, #48] - adcs l, l, s6 - adcs h, h, s7 - stp l, h, [z, #48] + ldp l, h, [z, #48] + adcs l, l, s6 + adcs h, h, s7 + stp l, h, [z, #48] - ldr t, [z, #64] - adc t, t, xzr - str t, [z, #64] + ldr t, [z, #64] + adc t, t, xzr + str t, [z, #64] // Now get the cross-product in [s7,...,s0] with variant of bignum_mul_4_8 - mul s0, a0, b0 - mul s4, a1, b1 - mul s5, a2, b2 - mul s6, a3, b3 - umulh s7, a0, b0 - adds s4, s4, s7 - umulh s7, a1, b1 - adcs s5, s5, s7 - umulh s7, a2, b2 - adcs s6, s6, s7 - umulh s7, a3, b3 - adc s7, s7, xzr - adds s1, s4, s0 - adcs s4, s5, s4 - adcs s5, s6, s5 - adcs s6, s7, s6 - adc s7, xzr, s7 - adds s2, s4, s0 - adcs s3, s5, s1 - adcs s4, s6, s4 - adcs s5, s7, s5 - adcs s6, xzr, s6 - adc s7, xzr, s7 - subs t, a2, a3 - cneg t, t, cc - csetm c, cc - subs h, b3, b2 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s5, s5, l - eor h, h, c - adcs s6, s6, h - adc s7, s7, c - subs t, a0, a1 - cneg t, t, cc - csetm c, cc - subs h, b1, b0 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s1, s1, l - eor h, h, c - adcs s2, s2, h - adcs s3, s3, c - adcs s4, s4, c - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c - subs t, a1, a3 - cneg t, t, cc - csetm c, cc - subs h, b3, b1 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s4, s4, l - eor h, h, c - adcs s5, s5, h - adcs s6, s6, c - adc s7, s7, c - subs t, a0, a2 - cneg t, t, cc - csetm c, cc - subs h, b2, b0 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s2, s2, l - eor h, h, c - adcs s3, s3, h - adcs s4, s4, c - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c - subs t, a0, a3 - cneg t, t, cc - csetm c, cc - subs h, b3, b0 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s3, s3, l - eor h, h, c - adcs s4, s4, h - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c - subs t, a1, a2 - cneg t, t, cc - csetm c, cc - subs h, b2, b1 - cneg h, h, cc - mul l, t, h - umulh h, t, h - cinv c, c, cc - cmn c, #0x1 - eor l, l, c - adcs s3, s3, l - eor h, h, c - adcs s4, s4, h - adcs s5, s5, c - adcs s6, s6, c - adc s7, s7, c + mul s0, a0, b0 + mul s4, a1, b1 + mul s5, a2, b2 + mul s6, a3, b3 + umulh s7, a0, b0 + adds s4, s4, s7 + umulh s7, a1, b1 + adcs s5, s5, s7 + umulh s7, a2, b2 + adcs s6, s6, s7 + umulh s7, a3, b3 + adc s7, s7, xzr + adds s1, s4, s0 + adcs s4, s5, s4 + adcs s5, s6, s5 + adcs s6, s7, s6 + adc s7, xzr, s7 + adds s2, s4, s0 + adcs s3, s5, s1 + adcs s4, s6, s4 + adcs s5, s7, s5 + adcs s6, xzr, s6 + adc s7, xzr, s7 + subs t, a2, a3 + cneg t, t, cc + csetm c, cc + subs h, b3, b2 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s5, s5, l + eor h, h, c + adcs s6, s6, h + adc s7, s7, c + subs t, a0, a1 + cneg t, t, cc + csetm c, cc + subs h, b1, b0 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s1, s1, l + eor h, h, c + adcs s2, s2, h + adcs s3, s3, c + adcs s4, s4, c + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c + subs t, a1, a3 + cneg t, t, cc + csetm c, cc + subs h, b3, b1 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s4, s4, l + eor h, h, c + adcs s5, s5, h + adcs s6, s6, c + adc s7, s7, c + subs t, a0, a2 + cneg t, t, cc + csetm c, cc + subs h, b2, b0 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s2, s2, l + eor h, h, c + adcs s3, s3, h + adcs s4, s4, c + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c + subs t, a0, a3 + cneg t, t, cc + csetm c, cc + subs h, b3, b0 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s3, s3, l + eor h, h, c + adcs s4, s4, h + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c + subs t, a1, a2 + cneg t, t, cc + csetm c, cc + subs h, b2, b1 + cneg h, h, cc + mul l, t, h + umulh h, t, h + cinv c, c, cc + cmn c, #0x1 + eor l, l, c + adcs s3, s3, l + eor h, h, c + adcs s4, s4, h + adcs s5, s5, c + adcs s6, s6, c + adc s7, s7, c // Let the cross product be M. We want to add 2^256 * 2 * M to the buffer // Split M into M_top (248 bits) and M_bot (264 bits), so we add @@ -528,87 +528,87 @@ _bignum_sqr_p521: // As this sum is built, accumulate t = AND of words d7...d1 to help // in condensing the carry chain in the comparison that comes next - ldp l, h, [z] - extr d0, s5, s4, #8 - adds d0, d0, l - extr d1, s6, s5, #8 - adcs d1, d1, h - - ldp l, h, [z, #16] - extr d2, s7, s6, #8 - adcs d2, d2, l - and t, d1, d2 - lsr d3, s7, #8 - adcs d3, d3, h - and t, t, d3 - - ldp l, h, [z, #32] - lsl d4, s0, #1 - adcs d4, d4, l - and t, t, d4 - extr d5, s1, s0, #63 - adcs d5, d5, h - and t, t, d5 - - ldp l, h, [z, #48] - extr d6, s2, s1, #63 - adcs d6, d6, l - and t, t, d6 - extr d7, s3, s2, #63 - adcs d7, d7, h - and t, t, d7 - - ldr l, [z, #64] - extr d8, s4, s3, #63 - and d8, d8, #0x1FF - adc d8, l, d8 + ldp l, h, [z] + extr d0, s5, s4, #8 + adds d0, d0, l + extr d1, s6, s5, #8 + adcs d1, d1, h + + ldp l, h, [z, #16] + extr d2, s7, s6, #8 + adcs d2, d2, l + and t, d1, d2 + lsr d3, s7, #8 + adcs d3, d3, h + and t, t, d3 + + ldp l, h, [z, #32] + lsl d4, s0, #1 + adcs d4, d4, l + and t, t, d4 + extr d5, s1, s0, #63 + adcs d5, d5, h + and t, t, d5 + + ldp l, h, [z, #48] + extr d6, s2, s1, #63 + adcs d6, d6, l + and t, t, d6 + extr d7, s3, s2, #63 + adcs d7, d7, h + and t, t, d7 + + ldr l, [z, #64] + extr d8, s4, s3, #63 + and d8, d8, #0x1FF + adc d8, l, d8 // Extract the high part h and mask off the low part l = [d8;d7;...;d0] // but stuff d8 with 1 bits at the left to ease a comparison below - lsr h, d8, #9 - orr d8, d8, #~0x1FF + lsr h, d8, #9 + orr d8, d8, #~0x1FF // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits d7,...d1 are all 1s, we use the AND of them "t" to // condense the carry chain, and since we stuffed 1 bits into d8 we get // the result in CF without an additional comparison. - subs xzr, xzr, xzr - adcs xzr, d0, h - adcs xzr, t, xzr - adcs xzr, d8, xzr + subs xzr, xzr, xzr + adcs xzr, d0, h + adcs xzr, t, xzr + adcs xzr, d8, xzr // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // This masking also gets rid of the stuffing with 1s we did above. - adcs d0, d0, h - adcs d1, d1, xzr - adcs d2, d2, xzr - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - adcs d6, d6, xzr - adcs d7, d7, xzr - adc d8, d8, xzr - and d8, d8, #0x1FF + adcs d0, d0, h + adcs d1, d1, xzr + adcs d2, d2, xzr + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + adcs d6, d6, xzr + adcs d7, d7, xzr + adc d8, d8, xzr + and d8, d8, #0x1FF // Store the final result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] // Restore regs and return - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_sqr_p521_alt.S b/arm/p521/bignum_sqr_p521_alt.S index 02faa5f2d0..c2d7bc996b 100644 --- a/arm/p521/bignum_sqr_p521_alt.S +++ b/arm/p521/bignum_sqr_p521_alt.S @@ -65,309 +65,309 @@ _bignum_sqr_p521_alt: // It's convenient to have more registers to play with - stp x19, x20, [sp, #-16]! - stp x21, x22, [sp, #-16]! - stp x23, x24, [sp, #-16]! - stp x25, x26, [sp, #-16]! - stp x27, x29, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x29, [sp, #-16]! // Load low 8 elements as [a7;a6;a5;a4;a3;a2;a1;a0], set up an initial // window [u8;u7;u6;u5;u4;u3;u2;u1] = 10 + 20 + 30 + 40 + 50 + 60 + 70 - ldp a0, a1, [x] + ldp a0, a1, [x] - mul u1, a0, a1 - umulh u2, a0, a1 + mul u1, a0, a1 + umulh u2, a0, a1 - ldp a2, a3, [x, #16] + ldp a2, a3, [x, #16] - mul l, a0, a2 - umulh u3, a0, a2 - adds u2, u2, l + mul l, a0, a2 + umulh u3, a0, a2 + adds u2, u2, l - ldp a4, a5, [x, #32] + ldp a4, a5, [x, #32] - mul l, a0, a3 - umulh u4, a0, a3 - adcs u3, u3, l + mul l, a0, a3 + umulh u4, a0, a3 + adcs u3, u3, l - ldp a6, a7, [x, #48] + ldp a6, a7, [x, #48] - mul l, a0, a4 - umulh u5, a0, a4 - adcs u4, u4, l + mul l, a0, a4 + umulh u5, a0, a4 + adcs u4, u4, l - mul l, a0, a5 - umulh u6, a0, a5 - adcs u5, u5, l + mul l, a0, a5 + umulh u6, a0, a5 + adcs u5, u5, l - mul l, a0, a6 - umulh u7, a0, a6 - adcs u6, u6, l + mul l, a0, a6 + umulh u7, a0, a6 + adcs u6, u6, l - mul l, a0, a7 - umulh u8, a0, a7 - adcs u7, u7, l + mul l, a0, a7 + umulh u8, a0, a7 + adcs u7, u7, l - adc u8, u8, xzr + adc u8, u8, xzr // Add in the next diagonal = 21 + 31 + 41 + 51 + 61 + 71 + 54 - mul l, a1, a2 - adds u3, u3, l - mul l, a1, a3 - adcs u4, u4, l - mul l, a1, a4 - adcs u5, u5, l - mul l, a1, a5 - adcs u6, u6, l - mul l, a1, a6 - adcs u7, u7, l - mul l, a1, a7 - adcs u8, u8, l - cset u9, cs - - umulh l, a1, a2 - adds u4, u4, l - umulh l, a1, a3 - adcs u5, u5, l - umulh l, a1, a4 - adcs u6, u6, l - umulh l, a1, a5 - adcs u7, u7, l - umulh l, a1, a6 - adcs u8, u8, l - umulh l, a1, a7 - adc u9, u9, l - mul l, a4, a5 - umulh u10, a4, a5 - adds u9, u9, l - adc u10, u10, xzr + mul l, a1, a2 + adds u3, u3, l + mul l, a1, a3 + adcs u4, u4, l + mul l, a1, a4 + adcs u5, u5, l + mul l, a1, a5 + adcs u6, u6, l + mul l, a1, a6 + adcs u7, u7, l + mul l, a1, a7 + adcs u8, u8, l + cset u9, cs + + umulh l, a1, a2 + adds u4, u4, l + umulh l, a1, a3 + adcs u5, u5, l + umulh l, a1, a4 + adcs u6, u6, l + umulh l, a1, a5 + adcs u7, u7, l + umulh l, a1, a6 + adcs u8, u8, l + umulh l, a1, a7 + adc u9, u9, l + mul l, a4, a5 + umulh u10, a4, a5 + adds u9, u9, l + adc u10, u10, xzr // And the next one = 32 + 42 + 52 + 62 + 72 + 64 + 65 - mul l, a2, a3 - adds u5, u5, l - mul l, a2, a4 - adcs u6, u6, l - mul l, a2, a5 - adcs u7, u7, l - mul l, a2, a6 - adcs u8, u8, l - mul l, a2, a7 - adcs u9, u9, l - mul l, a4, a6 - adcs u10, u10, l - cset u11, cs - - umulh l, a2, a3 - adds u6, u6, l - umulh l, a2, a4 - adcs u7, u7, l - umulh l, a2, a5 - adcs u8, u8, l - umulh l, a2, a6 - adcs u9, u9, l - umulh l, a2, a7 - adcs u10, u10, l - umulh l, a4, a6 - adc u11, u11, l - mul l, a5, a6 - umulh u12, a5, a6 - adds u11, u11, l - adc u12, u12, xzr + mul l, a2, a3 + adds u5, u5, l + mul l, a2, a4 + adcs u6, u6, l + mul l, a2, a5 + adcs u7, u7, l + mul l, a2, a6 + adcs u8, u8, l + mul l, a2, a7 + adcs u9, u9, l + mul l, a4, a6 + adcs u10, u10, l + cset u11, cs + + umulh l, a2, a3 + adds u6, u6, l + umulh l, a2, a4 + adcs u7, u7, l + umulh l, a2, a5 + adcs u8, u8, l + umulh l, a2, a6 + adcs u9, u9, l + umulh l, a2, a7 + adcs u10, u10, l + umulh l, a4, a6 + adc u11, u11, l + mul l, a5, a6 + umulh u12, a5, a6 + adds u11, u11, l + adc u12, u12, xzr // And the final one = 43 + 53 + 63 + 73 + 74 + 75 + 76 - mul l, a3, a4 - adds u7, u7, l - mul l, a3, a5 - adcs u8, u8, l - mul l, a3, a6 - adcs u9, u9, l - mul l, a3, a7 - adcs u10, u10, l - mul l, a4, a7 - adcs u11, u11, l - mul l, a5, a7 - adcs u12, u12, l - cset u13, cs - - umulh l, a3, a4 - adds u8, u8, l - umulh l, a3, a5 - adcs u9, u9, l - umulh l, a3, a6 - adcs u10, u10, l - umulh l, a3, a7 - adcs u11, u11, l - umulh l, a4, a7 - adcs u12, u12, l - umulh l, a5, a7 - adc u13, u13, l - mul l, a6, a7 - umulh u14, a6, a7 - adds u13, u13, l - adc u14, u14, xzr + mul l, a3, a4 + adds u7, u7, l + mul l, a3, a5 + adcs u8, u8, l + mul l, a3, a6 + adcs u9, u9, l + mul l, a3, a7 + adcs u10, u10, l + mul l, a4, a7 + adcs u11, u11, l + mul l, a5, a7 + adcs u12, u12, l + cset u13, cs + + umulh l, a3, a4 + adds u8, u8, l + umulh l, a3, a5 + adcs u9, u9, l + umulh l, a3, a6 + adcs u10, u10, l + umulh l, a3, a7 + adcs u11, u11, l + umulh l, a4, a7 + adcs u12, u12, l + umulh l, a5, a7 + adc u13, u13, l + mul l, a6, a7 + umulh u14, a6, a7 + adds u13, u13, l + adc u14, u14, xzr // Double that, with u15 holding the top carry - adds u1, u1, u1 - adcs u2, u2, u2 - adcs u3, u3, u3 - adcs u4, u4, u4 - adcs u5, u5, u5 - adcs u6, u6, u6 - adcs u7, u7, u7 - adcs u8, u8, u8 - adcs u9, u9, u9 - adcs u10, u10, u10 - adcs u11, u11, u11 - adcs u12, u12, u12 - adcs u13, u13, u13 - adcs u14, u14, u14 - cset u15, cs + adds u1, u1, u1 + adcs u2, u2, u2 + adcs u3, u3, u3 + adcs u4, u4, u4 + adcs u5, u5, u5 + adcs u6, u6, u6 + adcs u7, u7, u7 + adcs u8, u8, u8 + adcs u9, u9, u9 + adcs u10, u10, u10 + adcs u11, u11, u11 + adcs u12, u12, u12 + adcs u13, u13, u13 + adcs u14, u14, u14 + cset u15, cs // Add the homogeneous terms 00 + 11 + 22 + 33 + 44 + 55 + 66 + 77 - umulh l, a0, a0 - mul u0, a0, a0 - adds u1, u1, l - - mul l, a1, a1 - adcs u2, u2, l - umulh l, a1, a1 - adcs u3, u3, l - - mul l, a2, a2 - adcs u4, u4, l - umulh l, a2, a2 - adcs u5, u5, l - - mul l, a3, a3 - adcs u6, u6, l - umulh l, a3, a3 - adcs u7, u7, l - - mul l, a4, a4 - adcs u8, u8, l - umulh l, a4, a4 - adcs u9, u9, l - - mul l, a5, a5 - adcs u10, u10, l - umulh l, a5, a5 - adcs u11, u11, l - - mul l, a6, a6 - adcs u12, u12, l - umulh l, a6, a6 - adcs u13, u13, l - - mul l, a7, a7 - adcs u14, u14, l - umulh l, a7, a7 - adc u15, u15, l + umulh l, a0, a0 + mul u0, a0, a0 + adds u1, u1, l + + mul l, a1, a1 + adcs u2, u2, l + umulh l, a1, a1 + adcs u3, u3, l + + mul l, a2, a2 + adcs u4, u4, l + umulh l, a2, a2 + adcs u5, u5, l + + mul l, a3, a3 + adcs u6, u6, l + umulh l, a3, a3 + adcs u7, u7, l + + mul l, a4, a4 + adcs u8, u8, l + umulh l, a4, a4 + adcs u9, u9, l + + mul l, a5, a5 + adcs u10, u10, l + umulh l, a5, a5 + adcs u11, u11, l + + mul l, a6, a6 + adcs u12, u12, l + umulh l, a6, a6 + adcs u13, u13, l + + mul l, a7, a7 + adcs u14, u14, l + umulh l, a7, a7 + adc u15, u15, l // Now load in the top digit a8, and also set up its double and square - ldr a8, [x, #64] - mul u16, a8, a8 - add a8, a8, a8 + ldr a8, [x, #64] + mul u16, a8, a8 + add a8, a8, a8 // Add a8 * [a7;...;a0] into the top of the buffer - mul l, a8, a0 - adds u8, u8, l - mul l, a8, a1 - adcs u9, u9, l - mul l, a8, a2 - adcs u10, u10, l - mul l, a8, a3 - adcs u11, u11, l - mul l, a8, a4 - adcs u12, u12, l - mul l, a8, a5 - adcs u13, u13, l - mul l, a8, a6 - adcs u14, u14, l - mul l, a8, a7 - adcs u15, u15, l - adc u16, u16, xzr - - umulh l, a8, a0 - adds u9, u9, l - umulh l, a8, a1 - adcs u10, u10, l - umulh l, a8, a2 - adcs u11, u11, l - umulh l, a8, a3 - adcs u12, u12, l - umulh l, a8, a4 - adcs u13, u13, l - umulh l, a8, a5 - adcs u14, u14, l - umulh l, a8, a6 - adcs u15, u15, l - umulh l, a8, a7 - adc u16, u16, l + mul l, a8, a0 + adds u8, u8, l + mul l, a8, a1 + adcs u9, u9, l + mul l, a8, a2 + adcs u10, u10, l + mul l, a8, a3 + adcs u11, u11, l + mul l, a8, a4 + adcs u12, u12, l + mul l, a8, a5 + adcs u13, u13, l + mul l, a8, a6 + adcs u14, u14, l + mul l, a8, a7 + adcs u15, u15, l + adc u16, u16, xzr + + umulh l, a8, a0 + adds u9, u9, l + umulh l, a8, a1 + adcs u10, u10, l + umulh l, a8, a2 + adcs u11, u11, l + umulh l, a8, a3 + adcs u12, u12, l + umulh l, a8, a4 + adcs u13, u13, l + umulh l, a8, a5 + adcs u14, u14, l + umulh l, a8, a6 + adcs u15, u15, l + umulh l, a8, a7 + adc u16, u16, l // Now we have the full product, which we consider as // 2^521 * h + l. Form h + l + 1 - subs xzr, xzr, xzr - extr l, u9, u8, #9 - adcs u0, u0, l - extr l, u10, u9, #9 - adcs u1, u1, l - extr l, u11, u10, #9 - adcs u2, u2, l - extr l, u12, u11, #9 - adcs u3, u3, l - extr l, u13, u12, #9 - adcs u4, u4, l - extr l, u14, u13, #9 - adcs u5, u5, l - extr l, u15, u14, #9 - adcs u6, u6, l - extr l, u16, u15, #9 - adcs u7, u7, l - orr u8, u8, #~0x1FF - lsr l, u16, #9 - adcs u8, u8, l + subs xzr, xzr, xzr + extr l, u9, u8, #9 + adcs u0, u0, l + extr l, u10, u9, #9 + adcs u1, u1, l + extr l, u11, u10, #9 + adcs u2, u2, l + extr l, u12, u11, #9 + adcs u3, u3, l + extr l, u13, u12, #9 + adcs u4, u4, l + extr l, u14, u13, #9 + adcs u5, u5, l + extr l, u15, u14, #9 + adcs u6, u6, l + extr l, u16, u15, #9 + adcs u7, u7, l + orr u8, u8, #~0x1FF + lsr l, u16, #9 + adcs u8, u8, l // Now CF is set if h + l + 1 >= 2^521, which means it's already // the answer, while if ~CF the answer is h + l so we should subtract // 1 (all considered in 521 bits). Hence subtract ~CF and mask. - sbcs u0, u0, xzr - sbcs u1, u1, xzr - sbcs u2, u2, xzr - sbcs u3, u3, xzr - sbcs u4, u4, xzr - sbcs u5, u5, xzr - sbcs u6, u6, xzr - sbcs u7, u7, xzr - sbc u8, u8, xzr - and u8, u8, #0x1FF + sbcs u0, u0, xzr + sbcs u1, u1, xzr + sbcs u2, u2, xzr + sbcs u3, u3, xzr + sbcs u4, u4, xzr + sbcs u5, u5, xzr + sbcs u6, u6, xzr + sbcs u7, u7, xzr + sbc u8, u8, xzr + and u8, u8, #0x1FF // Store back digits of final result - stp u0, u1, [z] - stp u2, u3, [z, #16] - stp u4, u5, [z, #32] - stp u6, u7, [z, #48] - str u8, [z, #64] + stp u0, u1, [z] + stp u2, u3, [z, #16] + stp u4, u5, [z, #32] + stp u6, u7, [z, #48] + str u8, [z, #64] // Restore registers and return - ldp x27, x29, [sp], #16 - ldp x25, x26, [sp], #16 - ldp x23, x24, [sp], #16 - ldp x21, x22, [sp], #16 - ldp x19, x20, [sp], #16 + ldp x27, x29, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_sub_p521.S b/arm/p521/bignum_sub_p521.S index b7a78f6973..f1cd34ed4b 100644 --- a/arm/p521/bignum_sub_p521.S +++ b/arm/p521/bignum_sub_p521.S @@ -49,50 +49,50 @@ _bignum_sub_p521: // First just subtract the numbers as [d8;d7;d6;d5;d4;d3;d2;d1;d0] = x - y - ldp d0, d1, [x] - ldp l, h, [y] - subs d0, d0, l - sbcs d1, d1, h - ldp d2, d3, [x, #16] - ldp l, h, [y, #16] - sbcs d2, d2, l - sbcs d3, d3, h - ldp d4, d5, [x, #32] - ldp l, h, [y, #32] - sbcs d4, d4, l - sbcs d5, d5, h - ldp d6, d7, [x, #48] - ldp l, h, [y, #48] - sbcs d6, d6, l - sbcs d7, d7, h - ldr d8, [x, #64] - ldr l, [y, #64] - sbcs d8, d8, l + ldp d0, d1, [x] + ldp l, h, [y] + subs d0, d0, l + sbcs d1, d1, h + ldp d2, d3, [x, #16] + ldp l, h, [y, #16] + sbcs d2, d2, l + sbcs d3, d3, h + ldp d4, d5, [x, #32] + ldp l, h, [y, #32] + sbcs d4, d4, l + sbcs d5, d5, h + ldp d6, d7, [x, #48] + ldp l, h, [y, #48] + sbcs d6, d6, l + sbcs d7, d7, h + ldr d8, [x, #64] + ldr l, [y, #64] + sbcs d8, d8, l // Now if x < y we want (x - y) + p_521 == (x - y) - 1 (mod 2^521) // Otherwise we just want the existing x - y result. So subtract // 1 iff the initial subtraction carried, then mask to 521 bits. - sbcs d0, d0, xzr - sbcs d1, d1, xzr - sbcs d2, d2, xzr - sbcs d3, d3, xzr - sbcs d4, d4, xzr - sbcs d5, d5, xzr - sbcs d6, d6, xzr - sbcs d7, d7, xzr - sbcs d8, d8, xzr - and d8, d8, #0x1FF + sbcs d0, d0, xzr + sbcs d1, d1, xzr + sbcs d2, d2, xzr + sbcs d3, d3, xzr + sbcs d4, d4, xzr + sbcs d5, d5, xzr + sbcs d6, d6, xzr + sbcs d7, d7, xzr + sbcs d8, d8, xzr + and d8, d8, #0x1FF // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_tolebytes_p521.S b/arm/p521/bignum_tolebytes_p521.S index 4ab76f0ea9..16f87ec272 100644 --- a/arm/p521/bignum_tolebytes_p521.S +++ b/arm/p521/bignum_tolebytes_p521.S @@ -41,164 +41,164 @@ _bignum_tolebytes_p521: // word 0 - ldr d, [x] - strb dshort, [z] - lsr d, d, #8 - strb dshort, [z, #1] - lsr d, d, #8 - strb dshort, [z, #2] - lsr d, d, #8 - strb dshort, [z, #3] - lsr d, d, #8 - strb dshort, [z, #4] - lsr d, d, #8 - strb dshort, [z, #5] - lsr d, d, #8 - strb dshort, [z, #6] - lsr d, d, #8 - strb dshort, [z, #7] + ldr d, [x] + strb dshort, [z] + lsr d, d, #8 + strb dshort, [z, #1] + lsr d, d, #8 + strb dshort, [z, #2] + lsr d, d, #8 + strb dshort, [z, #3] + lsr d, d, #8 + strb dshort, [z, #4] + lsr d, d, #8 + strb dshort, [z, #5] + lsr d, d, #8 + strb dshort, [z, #6] + lsr d, d, #8 + strb dshort, [z, #7] // word 1 - ldr d, [x, #8] - strb dshort, [z, #8] - lsr d, d, #8 - strb dshort, [z, #9] - lsr d, d, #8 - strb dshort, [z, #10] - lsr d, d, #8 - strb dshort, [z, #11] - lsr d, d, #8 - strb dshort, [z, #12] - lsr d, d, #8 - strb dshort, [z, #13] - lsr d, d, #8 - strb dshort, [z, #14] - lsr d, d, #8 - strb dshort, [z, #15] + ldr d, [x, #8] + strb dshort, [z, #8] + lsr d, d, #8 + strb dshort, [z, #9] + lsr d, d, #8 + strb dshort, [z, #10] + lsr d, d, #8 + strb dshort, [z, #11] + lsr d, d, #8 + strb dshort, [z, #12] + lsr d, d, #8 + strb dshort, [z, #13] + lsr d, d, #8 + strb dshort, [z, #14] + lsr d, d, #8 + strb dshort, [z, #15] // word 2 - ldr d, [x, #16] - strb dshort, [z, #16] - lsr d, d, #8 - strb dshort, [z, #17] - lsr d, d, #8 - strb dshort, [z, #18] - lsr d, d, #8 - strb dshort, [z, #19] - lsr d, d, #8 - strb dshort, [z, #20] - lsr d, d, #8 - strb dshort, [z, #21] - lsr d, d, #8 - strb dshort, [z, #22] - lsr d, d, #8 - strb dshort, [z, #23] + ldr d, [x, #16] + strb dshort, [z, #16] + lsr d, d, #8 + strb dshort, [z, #17] + lsr d, d, #8 + strb dshort, [z, #18] + lsr d, d, #8 + strb dshort, [z, #19] + lsr d, d, #8 + strb dshort, [z, #20] + lsr d, d, #8 + strb dshort, [z, #21] + lsr d, d, #8 + strb dshort, [z, #22] + lsr d, d, #8 + strb dshort, [z, #23] // word 3 - ldr d, [x, #24] - strb dshort, [z, #24] - lsr d, d, #8 - strb dshort, [z, #25] - lsr d, d, #8 - strb dshort, [z, #26] - lsr d, d, #8 - strb dshort, [z, #27] - lsr d, d, #8 - strb dshort, [z, #28] - lsr d, d, #8 - strb dshort, [z, #29] - lsr d, d, #8 - strb dshort, [z, #30] - lsr d, d, #8 - strb dshort, [z, #31] + ldr d, [x, #24] + strb dshort, [z, #24] + lsr d, d, #8 + strb dshort, [z, #25] + lsr d, d, #8 + strb dshort, [z, #26] + lsr d, d, #8 + strb dshort, [z, #27] + lsr d, d, #8 + strb dshort, [z, #28] + lsr d, d, #8 + strb dshort, [z, #29] + lsr d, d, #8 + strb dshort, [z, #30] + lsr d, d, #8 + strb dshort, [z, #31] // word 4 - ldr d, [x, #32] - strb dshort, [z, #32] - lsr d, d, #8 - strb dshort, [z, #33] - lsr d, d, #8 - strb dshort, [z, #34] - lsr d, d, #8 - strb dshort, [z, #35] - lsr d, d, #8 - strb dshort, [z, #36] - lsr d, d, #8 - strb dshort, [z, #37] - lsr d, d, #8 - strb dshort, [z, #38] - lsr d, d, #8 - strb dshort, [z, #39] + ldr d, [x, #32] + strb dshort, [z, #32] + lsr d, d, #8 + strb dshort, [z, #33] + lsr d, d, #8 + strb dshort, [z, #34] + lsr d, d, #8 + strb dshort, [z, #35] + lsr d, d, #8 + strb dshort, [z, #36] + lsr d, d, #8 + strb dshort, [z, #37] + lsr d, d, #8 + strb dshort, [z, #38] + lsr d, d, #8 + strb dshort, [z, #39] // word 5 - ldr d, [x, #40] - strb dshort, [z, #40] - lsr d, d, #8 - strb dshort, [z, #41] - lsr d, d, #8 - strb dshort, [z, #42] - lsr d, d, #8 - strb dshort, [z, #43] - lsr d, d, #8 - strb dshort, [z, #44] - lsr d, d, #8 - strb dshort, [z, #45] - lsr d, d, #8 - strb dshort, [z, #46] - lsr d, d, #8 - strb dshort, [z, #47] + ldr d, [x, #40] + strb dshort, [z, #40] + lsr d, d, #8 + strb dshort, [z, #41] + lsr d, d, #8 + strb dshort, [z, #42] + lsr d, d, #8 + strb dshort, [z, #43] + lsr d, d, #8 + strb dshort, [z, #44] + lsr d, d, #8 + strb dshort, [z, #45] + lsr d, d, #8 + strb dshort, [z, #46] + lsr d, d, #8 + strb dshort, [z, #47] // word 6 - ldr d, [x, #48] - strb dshort, [z, #48] - lsr d, d, #8 - strb dshort, [z, #49] - lsr d, d, #8 - strb dshort, [z, #50] - lsr d, d, #8 - strb dshort, [z, #51] - lsr d, d, #8 - strb dshort, [z, #52] - lsr d, d, #8 - strb dshort, [z, #53] - lsr d, d, #8 - strb dshort, [z, #54] - lsr d, d, #8 - strb dshort, [z, #55] + ldr d, [x, #48] + strb dshort, [z, #48] + lsr d, d, #8 + strb dshort, [z, #49] + lsr d, d, #8 + strb dshort, [z, #50] + lsr d, d, #8 + strb dshort, [z, #51] + lsr d, d, #8 + strb dshort, [z, #52] + lsr d, d, #8 + strb dshort, [z, #53] + lsr d, d, #8 + strb dshort, [z, #54] + lsr d, d, #8 + strb dshort, [z, #55] // word 7 - ldr d, [x, #56] - strb dshort, [z, #56] - lsr d, d, #8 - strb dshort, [z, #57] - lsr d, d, #8 - strb dshort, [z, #58] - lsr d, d, #8 - strb dshort, [z, #59] - lsr d, d, #8 - strb dshort, [z, #60] - lsr d, d, #8 - strb dshort, [z, #61] - lsr d, d, #8 - strb dshort, [z, #62] - lsr d, d, #8 - strb dshort, [z, #63] + ldr d, [x, #56] + strb dshort, [z, #56] + lsr d, d, #8 + strb dshort, [z, #57] + lsr d, d, #8 + strb dshort, [z, #58] + lsr d, d, #8 + strb dshort, [z, #59] + lsr d, d, #8 + strb dshort, [z, #60] + lsr d, d, #8 + strb dshort, [z, #61] + lsr d, d, #8 + strb dshort, [z, #62] + lsr d, d, #8 + strb dshort, [z, #63] // word 8 - ldr d, [x, #64] - strb dshort, [z, #64] - lsr d, d, #8 - strb dshort, [z, #65] + ldr d, [x, #64] + strb dshort, [z, #64] + lsr d, d, #8 + strb dshort, [z, #65] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_tomont_p521.S b/arm/p521/bignum_tomont_p521.S index e1e5ce4d51..0aac27f3ba 100644 --- a/arm/p521/bignum_tomont_p521.S +++ b/arm/p521/bignum_tomont_p521.S @@ -50,8 +50,8 @@ _bignum_tomont_p521: // separate out x = 2^521 * H + L with h = H. Now x mod p_521 = // (H + L) mod p_521 = if H + L >= p_521 then H + L - p_521 else H + L. - ldr d8, [x, #64] - lsr h, d8, #9 + ldr d8, [x, #64] + lsr h, d8, #9 // Load in the other digits and decide whether H + L >= p_521. This is // equivalent to H + L + 1 >= 2^521, and since this can only happen if @@ -60,35 +60,35 @@ _bignum_tomont_p521: // This condenses only three pairs; the payoff beyond that seems limited. // By stuffing in 1 bits from 521 position upwards, get CF directly - subs xzr, xzr, xzr - ldp d0, d1, [x] - adcs xzr, d0, h - adcs xzr, d1, xzr - ldp d2, d3, [x, #16] - and t, d2, d3 - adcs xzr, t, xzr - ldp d4, d5, [x, #32] - and t, d4, d5 - adcs xzr, t, xzr - ldp d6, d7, [x, #48] - and t, d6, d7 - adcs xzr, t, xzr - orr t, d8, #~0x1FF - adcs t, t, xzr + subs xzr, xzr, xzr + ldp d0, d1, [x] + adcs xzr, d0, h + adcs xzr, d1, xzr + ldp d2, d3, [x, #16] + and t, d2, d3 + adcs xzr, t, xzr + ldp d4, d5, [x, #32] + and t, d4, d5 + adcs xzr, t, xzr + ldp d6, d7, [x, #48] + and t, d6, d7 + adcs xzr, t, xzr + orr t, d8, #~0x1FF + adcs t, t, xzr // Now H + L >= p_521 <=> H + L + 1 >= 2^521 <=> CF from this comparison. // So if CF is set we want (H + L) - p_521 = (H + L + 1) - 2^521 // while otherwise we want just H + L. So mask H + L + CF to 521 bits. - adcs d0, d0, h - adcs d1, d1, xzr - adcs d2, d2, xzr - adcs d3, d3, xzr - adcs d4, d4, xzr - adcs d5, d5, xzr - adcs d6, d6, xzr - adcs d7, d7, xzr - adc d8, d8, xzr + adcs d0, d0, h + adcs d1, d1, xzr + adcs d2, d2, xzr + adcs d3, d3, xzr + adcs d4, d4, xzr + adcs d5, d5, xzr + adcs d6, d6, xzr + adcs d7, d7, xzr + adc d8, d8, xzr // So far, this is just a modular reduction as in bignum_mod_p521_9, // except that the final masking of d8 is skipped since that comes out @@ -98,27 +98,27 @@ _bignum_tomont_p521: // right-to-left fashion, which might blend better with the carry // chain above, the digit register indices themselves get shuffled up. - lsl t, d0, #55 - extr d0, d1, d0, #9 - extr d1, d2, d1, #9 - extr d2, d3, d2, #9 - extr d3, d4, d3, #9 - extr d4, d5, d4, #9 - extr d5, d6, d5, #9 - extr d6, d7, d6, #9 - extr d7, d8, d7, #9 - lsr d8, d7, #9 - orr t, t, d8 - and d7, d7, #0x1FF + lsl t, d0, #55 + extr d0, d1, d0, #9 + extr d1, d2, d1, #9 + extr d2, d3, d2, #9 + extr d3, d4, d3, #9 + extr d4, d5, d4, #9 + extr d5, d6, d5, #9 + extr d6, d7, d6, #9 + extr d7, d8, d7, #9 + lsr d8, d7, #9 + orr t, t, d8 + and d7, d7, #0x1FF // Store the result from the shuffled registers [d7;d6;...;d1;d0;t] - stp t, d0, [z] - stp d1, d2, [z, #16] - stp d3, d4, [z, #32] - stp d5, d6, [z, #48] - str d7, [z, #64] - ret + stp t, d0, [z] + stp d1, d2, [z, #16] + stp d3, d4, [z, #32] + stp d5, d6, [z, #48] + str d7, [z, #64] + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/arm/p521/bignum_triple_p521.S b/arm/p521/bignum_triple_p521.S index a0a8503554..0a51a5322f 100644 --- a/arm/p521/bignum_triple_p521.S +++ b/arm/p521/bignum_triple_p521.S @@ -54,73 +54,73 @@ _bignum_triple_p521_alt: // Pick out top bit to wrap to the zero position in the doubling step - ldr d8, [x, #64] - lsl l, d8, #55 + ldr d8, [x, #64] + lsl l, d8, #55 // Rotate left to get x' == 2 * x (mod p_521) and add to x + 1 (carryin) to get // s = [d8;d7;d6;d5;d4;d3;d2;d1;d0] = x + x' + 1 == 3 * x + 1 (mod p_521) - subs xzr, xzr, xzr + subs xzr, xzr, xzr - ldp d0, d1, [x] - extr l, d0, l, #63 - extr h, d1, d0, #63 - adcs d0, d0, l + ldp d0, d1, [x] + extr l, d0, l, #63 + extr h, d1, d0, #63 + adcs d0, d0, l - ldp d2, d3, [x, #16] - extr l, d2, d1, #63 - adcs d1, d1, h - extr h, d3, d2, #63 - adcs d2, d2, l + ldp d2, d3, [x, #16] + extr l, d2, d1, #63 + adcs d1, d1, h + extr h, d3, d2, #63 + adcs d2, d2, l - ldp d4, d5, [x, #32] - extr l, d4, d3, #63 - adcs d3, d3, h - extr h, d5, d4, #63 - adcs d4, d4, l + ldp d4, d5, [x, #32] + extr l, d4, d3, #63 + adcs d3, d3, h + extr h, d5, d4, #63 + adcs d4, d4, l - ldp d6, d7, [x, #48] - extr l, d6, d5, #63 - adcs d5, d5, h - extr h, d7, d6, #63 - adcs d6, d6, l + ldp d6, d7, [x, #48] + extr l, d6, d5, #63 + adcs d5, d5, h + extr h, d7, d6, #63 + adcs d6, d6, l - extr l, d8, d7, #63 - adcs d7, d7, h - and l, l, #0x1FF - adcs d8, d8, l + extr l, d8, d7, #63 + adcs d7, d7, h + and l, l, #0x1FF + adcs d8, d8, l // We know x, x' < p_521 (they are the same bits except for the positions) // so x + x' + 1 <= 2 * (p_521 - 1) + 1 < 2 * p_521. // Note that x + x' >= p_521 <=> s = x + x' + 1 >= 2^521 // Set CF <=> s = x + x' + 1 >= 2^521 and make it a mask in l as well - subs l, d8, #512 - csetm l, cs + subs l, d8, #512 + csetm l, cs // Now if CF is set (and l is all 1s), we want (x + x') - p_521 = s - 2^521 // while otherwise we want x + x' = s - 1 (from existing CF, which is nice) - sbcs d0, d0, xzr - and l, l, #512 - sbcs d1, d1, xzr - sbcs d2, d2, xzr - sbcs d3, d3, xzr - sbcs d4, d4, xzr - sbcs d5, d5, xzr - sbcs d6, d6, xzr - sbcs d7, d7, xzr - sbc d8, d8, l + sbcs d0, d0, xzr + and l, l, #512 + sbcs d1, d1, xzr + sbcs d2, d2, xzr + sbcs d3, d3, xzr + sbcs d4, d4, xzr + sbcs d5, d5, xzr + sbcs d6, d6, xzr + sbcs d7, d7, xzr + sbc d8, d8, l // Store the result - stp d0, d1, [z] - stp d2, d3, [z, #16] - stp d4, d5, [z, #32] - stp d6, d7, [z, #48] - str d8, [z, #64] + stp d0, d1, [z] + stp d2, d3, [z, #16] + stp d4, d5, [z, #32] + stp d6, d7, [z, #48] + str d8, [z, #64] - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S index d246dc8cba..7e69d65164 100644 --- a/x86_att/curve25519/bignum_neg_p25519.S +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -46,39 +46,39 @@ _bignum_neg_p25519: // Load the 4 digits of x and let q be an OR of all the digits - movq (x), n0 - movq n0, q - movq 8(x), n1 - orq n1, q - movq 16(x), n2 - orq n2, q - movq 24(x), n3 - orq n3, q + movq (x), n0 + movq n0, q + movq 8(x), n1 + orq n1, q + movq 16(x), n2 + orq n2, q + movq 24(x), n3 + orq n3, q // Turn q into a strict x <> 0 bitmask, and c into a masked constant [-19] // so that [q;q;q;c] = [2^256 - 19], masked according to nonzeroness of x - negq q - sbbq q, q - movq $-19, c - andq q, c + negq q + sbbq q, q + movq $-19, c + andq q, c // Now just do [2^256 - 19] - x and then mask to 255 bits, // which means in effect the required [2^255 - 19] - x - subq n0, c - movq c, (z) - movq q, c - sbbq n1, c - movq c, 8(z) - movq q, c - sbbq n2, c - movq c, 16(z) - sbbq n3, q - btr $63, q - movq q, 24(z) - - ret + subq n0, c + movq c, (z) + movq q, c + sbbq n1, c + movq c, 8(z) + movq q, c + sbbq n2, c + movq c, 16(z) + sbbq n3, q + btr $63, q + movq q, 24(z) + + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_bigendian_6.S b/x86_att/p384/bignum_bigendian_6.S index 5e33f5b11c..268ed5b90e 100644 --- a/x86_att/p384/bignum_bigendian_6.S +++ b/x86_att/p384/bignum_bigendian_6.S @@ -64,32 +64,32 @@ _bignum_tobebytes_6: // 0 and 5 words - movq (x), a - movq 40(x), b - bswapq a - bswapq b - movq a, 40(z) - movq b, (z) + movq (x), a + movq 40(x), b + bswapq a + bswapq b + movq a, 40(z) + movq b, (z) // 1 and 4 words - movq 8(x), a - movq 32(x), b - bswapq a - bswapq b - movq a, 32(z) - movq b, 8(z) + movq 8(x), a + movq 32(x), b + bswapq a + bswapq b + movq a, 32(z) + movq b, 8(z) // 2 and 3 words - movq 16(x), a - movq 24(x), b - bswapq a - bswapq b - movq a, 24(z) - movq b, 16(z) + movq 16(x), a + movq 24(x), b + bswapq a + bswapq b + movq a, 24(z) + movq b, 16(z) - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_cmul_p384.S b/x86_att/p384/bignum_cmul_p384.S index 26b8f307c0..13a7c0085d 100644 --- a/x86_att/p384/bignum_cmul_p384.S +++ b/x86_att/p384/bignum_cmul_p384.S @@ -58,12 +58,12 @@ _bignum_cmul_p384: // We seem to need (just!) one extra register, which we need to save and restore - pushq %r12 + pushq %r12 // Shuffle inputs (since we want multiplier in %rdx) - movq %rdx, x - movq %rsi, m + movq %rdx, x + movq %rsi, m // Multiply, accumulating the result as 2^384 * h + [d5;d4;d3;d2;d1;d0] // but actually immediately producing q = h + 1, our quotient approximation, @@ -71,18 +71,18 @@ _bignum_cmul_p384: // product is <= (2^64 - 1) * (p_384 - 1) and hence h <= 2^64 - 2, meaning // there is no danger this addition of 1 could wrap. - mulxq (x), d0, d1 - mulxq 8(x), a, d2 - addq a, d1 - mulxq 16(x), a, d3 - adcq a, d2 - mulxq 24(x), a, d4 - adcq a, d3 - mulxq 32(x), a, d5 - adcq a, d4 - mulxq 40(x), a, q - adcq a, d5 - adcq $1, q + mulxq (x), d0, d1 + mulxq 8(x), a, d2 + addq a, d1 + mulxq 16(x), a, d3 + adcq a, d2 + mulxq 24(x), a, d4 + adcq a, d3 + mulxq 32(x), a, d5 + adcq a, d4 + mulxq 40(x), a, q + adcq a, d5 + adcq $1, q // It's easy to see -p_384 <= z - q * p_384 < p_384, so we just need to // subtract q * p_384 and then correct if that is negative by adding p_384. @@ -94,24 +94,24 @@ _bignum_cmul_p384: // = 2^384 * (h - q) + (l + q * r) // = 2^384 * (-1) + (l + q * r) - xorq c, c - movq $0xffffffff00000001, a - mulxq a, a, c - adcxq a, d0 - adoxq c, d1 - movl $0x00000000ffffffff, ashort - mulxq a, a, c - adcxq a, d1 - adoxq c, d2 - adcxq q, d2 - movl $0, ashort - movl $0, cshort - adoxq a, a - adcq a, d3 - adcq c, d4 - adcq c, d5 - adcq c, c - subq $1, c + xorq c, c + movq $0xffffffff00000001, a + mulxq a, a, c + adcxq a, d0 + adoxq c, d1 + movl $0x00000000ffffffff, ashort + mulxq a, a, c + adcxq a, d1 + adoxq c, d2 + adcxq q, d2 + movl $0, ashort + movl $0, cshort + adoxq a, a + adcq a, d3 + adcq c, d4 + adcq c, d5 + adcq c, c + subq $1, c // The net c value is now the top word of the 7-word answer, hence will // be -1 if we need a corrective addition, 0 otherwise, usable as a mask. @@ -119,29 +119,29 @@ _bignum_cmul_p384: // fact done by a masked subtraction of 2^384 - p_384, so that we only // have three nonzero digits and so can avoid using another register. - movl $0x00000000ffffffff, qshort - xorq a, a - andq c, q - subq q, a - andq $1, c - - subq a, d0 - movq d0, (z) - sbbq q, d1 - movq d1, 8(z) - sbbq c, d2 - movq d2, 16(z) - sbbq $0, d3 - movq d3, 24(z) - sbbq $0, d4 - movq d4, 32(z) - sbbq $0, d5 - movq d5, 40(z) + movl $0x00000000ffffffff, qshort + xorq a, a + andq c, q + subq q, a + andq $1, c + + subq a, d0 + movq d0, (z) + sbbq q, d1 + movq d1, 8(z) + sbbq c, d2 + movq d2, 16(z) + sbbq $0, d3 + movq d3, 24(z) + sbbq $0, d4 + movq d4, 32(z) + sbbq $0, d5 + movq d5, 40(z) // Return - popq %r12 - ret + popq %r12 + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index 99bc717b0f..8dd8d76068 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -62,11 +62,11 @@ _bignum_cmul_p384_alt: // We seem to need (just!) one extra register, which we need to save and restore - pushq %r12 + pushq %r12 // Shuffle inputs (since we want %rdx for the high parts of products) - movq %rdx, x + movq %rdx, x // Multiply, accumulating the result as 2^384 * h + [d5;d4;d3;d2;d1;d0] // but actually immediately producing q = h + 1, our quotient approximation, @@ -74,41 +74,41 @@ _bignum_cmul_p384_alt: // product is <= (2^64 - 1) * (p_384 - 1) and hence h <= 2^64 - 2, meaning // there is no danger this addition of 1 could wrap. - movq (x), a - mulq m - movq a, d0 - movq d, d1 - - movq 8(x), a - mulq m - xorq d2, d2 - addq a, d1 - adcq d, d2 - - movq 16(x), a - mulq m - xorq d3, d3 - addq a, d2 - adcq d, d3 - - movq 24(x), a - mulq m - xorq d4, d4 - addq a, d3 - adcq d, d4 - - movq 32(x), a - mulq m - addq a, d4 - adcq $0, d - - movq m, a - movq d, d5 - mulq 40(x) - movl $1, qshort - - addq a, d5 - adcq d, q + movq (x), a + mulq m + movq a, d0 + movq d, d1 + + movq 8(x), a + mulq m + xorq d2, d2 + addq a, d1 + adcq d, d2 + + movq 16(x), a + mulq m + xorq d3, d3 + addq a, d2 + adcq d, d3 + + movq 24(x), a + mulq m + xorq d4, d4 + addq a, d3 + adcq d, d4 + + movq 32(x), a + mulq m + addq a, d4 + adcq $0, d + + movq m, a + movq d, d5 + mulq 40(x) + movl $1, qshort + + addq a, d5 + adcq d, q // It's easy to see -p_384 <= z - q * p_384 < p_384, so we just need to // subtract q * p_384 and then correct if that is negative by adding p_384. @@ -120,23 +120,23 @@ _bignum_cmul_p384_alt: // = 2^384 * (h - q) + (l + q * r) // = 2^384 * (-1) + (l + q * r) - movq $0xffffffff00000001, a - mulq q - addq a, d0 - adcq d, d1 - adcq q, d2 - movq q, a - sbbq c, c - movl $0x00000000ffffffff, dshort - negq c - mulq d - addq a, d1 - adcq d, d2 - adcq c, d3 - adcq $0, d4 - adcq $0, d5 - sbbq c, c - notq c + movq $0xffffffff00000001, a + mulq q + addq a, d0 + adcq d, d1 + adcq q, d2 + movq q, a + sbbq c, c + movl $0x00000000ffffffff, dshort + negq c + mulq d + addq a, d1 + adcq d, d2 + adcq c, d3 + adcq $0, d4 + adcq $0, d5 + sbbq c, c + notq c // The net c value is now the top word of the 7-word answer, hence will // be -1 if we need a corrective addition, 0 otherwise, usable as a mask. @@ -144,29 +144,29 @@ _bignum_cmul_p384_alt: // fact done by a masked subtraction of 2^384 - p_384, so that we only // have three nonzero digits and so can avoid using another register. - movl $0x00000000ffffffff, dshort - xorq a, a - andq c, d - subq d, a - andq $1, c - - subq a, d0 - movq d0, (z) - sbbq d, d1 - movq d1, 8(z) - sbbq c, d2 - movq d2, 16(z) - sbbq $0, d3 - movq d3, 24(z) - sbbq $0, d4 - movq d4, 32(z) - sbbq $0, d5 - movq d5, 40(z) + movl $0x00000000ffffffff, dshort + xorq a, a + andq c, d + subq d, a + andq $1, c + + subq a, d0 + movq d0, (z) + sbbq d, d1 + movq d1, 8(z) + sbbq c, d2 + movq d2, 16(z) + sbbq $0, d3 + movq d3, 24(z) + sbbq $0, d4 + movq d4, 32(z) + sbbq $0, d5 + movq d5, 40(z) // Return - popq %r12 - ret + popq %r12 + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index 0e7871ca0b..733be9a487 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -56,28 +56,28 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rdx ; \ - shlq $32, %rdx ; \ - addq d0, %rdx ; \ + movq d0, %rdx ; \ + shlq $32, %rdx ; \ + addq d0, %rdx ; \ /* Construct [%rsi;%rcx;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 */ \ /* as a temp. */ \ - xorq %rsi, %rsi ; \ - movq $0xffffffff00000001, %rax ; \ - mulxq %rax, %rcx, %rax ; \ - movl $0x00000000ffffffff, %ecx ; \ - mulxq %rcx, d0, %rcx ; \ - adcq d0, %rax ; \ - adcq %rdx, %rcx ; \ - adcq $0, %rsi ; \ + xorq %rsi, %rsi ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rcx, %rax ; \ + movl $0x00000000ffffffff, %ecx ; \ + mulxq %rcx, d0, %rcx ; \ + adcq d0, %rax ; \ + adcq %rdx, %rcx ; \ + adcq $0, %rsi ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq %rcx, d2 ; \ - sbbq %rsi, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - movq %rdx, d6 ; \ - sbbq $0, d6 + subq %rax, d1 ; \ + sbbq %rcx, d2 ; \ + sbbq %rsi, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + movq %rdx, d6 ; \ + sbbq $0, d6 bignum_deamont_p384: _bignum_deamont_p384: diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index 04bc5243f4..6c5a606d19 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -55,29 +55,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rcx ; \ - shlq $32, %rcx ; \ - addq d0, %rcx ; \ + movq d0, %rcx ; \ + shlq $32, %rcx ; \ + addq d0, %rcx ; \ /* Construct [%rax;%rdx;d0;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 */ \ /* and %rcx as temps. */ \ - movq $0xffffffff00000001, %rax ; \ - mulq %rcx; \ - movq %rdx, d0 ; \ - movq $0x00000000ffffffff, %rax ; \ - mulq %rcx; \ - addq %rax, d0 ; \ - movl $0, %eax ; \ - adcq %rcx, %rdx ; \ - adcl %eax, %eax ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rcx; \ + movq %rdx, d0 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rcx; \ + addq %rax, d0 ; \ + movl $0, %eax ; \ + adcq %rcx, %rdx ; \ + adcl %eax, %eax ; \ /* Now subtract that and add 2^384 * w */ \ - subq d0, d1 ; \ - sbbq %rdx, d2 ; \ - sbbq %rax, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - movq %rcx, d6 ; \ - sbbq $0, d6 + subq d0, d1 ; \ + sbbq %rdx, d2 ; \ + sbbq %rax, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + movq %rcx, d6 ; \ + sbbq $0, d6 bignum_deamont_p384_alt: _bignum_deamont_p384_alt: diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index e3865d372b..059993b8ba 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -48,28 +48,28 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rdx ; \ - shlq $32, %rdx ; \ - addq d0, %rdx ; \ + movq d0, %rdx ; \ + shlq $32, %rdx ; \ + addq d0, %rdx ; \ /* Construct [%rsi;%rcx;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 */ \ /* as a temp. */ \ - xorq %rsi, %rsi ; \ - movq $0xffffffff00000001, %rax ; \ - mulxq %rax, %rcx, %rax ; \ - movl $0x00000000ffffffff, %ecx ; \ - mulxq %rcx, d0, %rcx ; \ - adcq d0, %rax ; \ - adcq %rdx, %rcx ; \ - adcq $0, %rsi ; \ + xorq %rsi, %rsi ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rcx, %rax ; \ + movl $0x00000000ffffffff, %ecx ; \ + mulxq %rcx, d0, %rcx ; \ + adcq d0, %rax ; \ + adcq %rdx, %rcx ; \ + adcq $0, %rsi ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq %rcx, d2 ; \ - sbbq %rsi, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - movq %rdx, d6 ; \ - sbbq $0, d6 + subq %rax, d1 ; \ + sbbq %rcx, d2 ; \ + sbbq %rsi, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + movq %rdx, d6 ; \ + sbbq $0, d6 bignum_demont_p384: _bignum_demont_p384: diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index f216eb213b..1ca60c55d2 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -47,29 +47,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rcx ; \ - shlq $32, %rcx ; \ - addq d0, %rcx ; \ + movq d0, %rcx ; \ + shlq $32, %rcx ; \ + addq d0, %rcx ; \ /* Construct [%rax;%rdx;d0;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 */ \ /* and %rcx as temps. */ \ - movq $0xffffffff00000001, %rax ; \ - mulq %rcx; \ - movq %rdx, d0 ; \ - movq $0x00000000ffffffff, %rax ; \ - mulq %rcx; \ - addq %rax, d0 ; \ - movl $0, %eax ; \ - adcq %rcx, %rdx ; \ - adcl %eax, %eax ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rcx; \ + movq %rdx, d0 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rcx; \ + addq %rax, d0 ; \ + movl $0, %eax ; \ + adcq %rcx, %rdx ; \ + adcl %eax, %eax ; \ /* Now subtract that and add 2^384 * w */ \ - subq d0, d1 ; \ - sbbq %rdx, d2 ; \ - sbbq %rax, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - movq %rcx, d6 ; \ - sbbq $0, d6 + subq d0, d1 ; \ + sbbq %rdx, d2 ; \ + sbbq %rax, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + movq %rcx, d6 ; \ + sbbq $0, d6 bignum_demont_p384_alt: _bignum_demont_p384_alt: diff --git a/x86_att/p384/bignum_half_p384.S b/x86_att/p384/bignum_half_p384.S index 5d274e7e79..5b58e3f536 100644 --- a/x86_att/p384/bignum_half_p384.S +++ b/x86_att/p384/bignum_half_p384.S @@ -49,52 +49,52 @@ _bignum_half_p384: // Load lowest digit and get a mask for its lowest bit in d3 - movq (x), a - movl $1, d3short - andq a, d3 - negq d3 + movq (x), a + movl $1, d3short + andq a, d3 + negq d3 // Create a masked version of p_384 (top 3 words = the mask itself) - movl $0x00000000ffffffff, d0short - andq d3, d0 - movq d0, d1 - xorq d3, d1 - movq d3, d2 - addq d2, d2 - andq d3, d2 - movq d3, d4 - movq d3, d5 + movl $0x00000000ffffffff, d0short + andq d3, d0 + movq d0, d1 + xorq d3, d1 + movq d3, d2 + addq d2, d2 + andq d3, d2 + movq d3, d4 + movq d3, d5 // Perform addition with masked p_384. Catch the carry in a, as a bitmask // for convenience though we only use its LSB below with SHRD - addq a, d0 - adcq 8(x), d1 - adcq 16(x), d2 - adcq 24(x), d3 - adcq 32(x), d4 - adcq 40(x), d5 - sbbq a, a + addq a, d0 + adcq 8(x), d1 + adcq 16(x), d2 + adcq 24(x), d3 + adcq 32(x), d4 + adcq 40(x), d5 + sbbq a, a // Shift right, pushing the carry back down, and store back - shrdq $1, d1, d0 - movq d0, (z) - shrdq $1, d2, d1 - movq d1, 8(z) - shrdq $1, d3, d2 - movq d2, 16(z) - shrdq $1, d4, d3 - movq d3, 24(z) - shrdq $1, d5, d4 - movq d4, 32(z) - shrdq $1, a, d5 - movq d5, 40(z) + shrdq $1, d1, d0 + movq d0, (z) + shrdq $1, d2, d1 + movq d1, 8(z) + shrdq $1, d3, d2 + movq d2, 16(z) + shrdq $1, d4, d3 + movq d3, 24(z) + shrdq $1, d5, d4 + movq d4, 32(z) + shrdq $1, a, d5 + movq d5, 40(z) // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_littleendian_6.S b/x86_att/p384/bignum_littleendian_6.S index 79a2f9b1e0..314167a7eb 100644 --- a/x86_att/p384/bignum_littleendian_6.S +++ b/x86_att/p384/bignum_littleendian_6.S @@ -54,25 +54,25 @@ _bignum_fromlebytes_6: bignum_tolebytes_6: _bignum_tolebytes_6: - movq (x), a - movq a, (z) + movq (x), a + movq a, (z) - movq 8(x), a - movq a, 8(z) + movq 8(x), a + movq a, 8(z) - movq 16(x), a - movq a, 16(z) + movq 16(x), a + movq a, 16(z) - movq 24(x), a - movq a, 24(z) + movq 24(x), a + movq a, 24(z) - movq 32(x), a - movq a, 32(z) + movq 32(x), a + movq a, 32(z) - movq 40(x), a - movq a, 40(z) + movq 40(x), a + movq a, 40(z) - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_mod_n384.S b/x86_att/p384/bignum_mod_n384.S index c62cf7be5c..1d50e536ec 100644 --- a/x86_att/p384/bignum_mod_n384.S +++ b/x86_att/p384/bignum_mod_n384.S @@ -56,168 +56,168 @@ _bignum_mod_n384: // Save extra registers - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 // If the input is already <= 5 words long, go to a trivial "copy" path - cmpq $6, k - jc shortinput + cmpq $6, k + jc shortinput // Otherwise load the top 6 digits (top-down) and reduce k by 6 - subq $6, k - movq 40(%rdx,k,8), m5 - movq 32(%rdx,k,8), m4 - movq 24(%rdx,k,8), m3 - movq 16(%rdx,k,8), m2 - movq 8(%rdx,k,8), m1 - movq (%rdx,k,8), m0 + subq $6, k + movq 40(%rdx,k,8), m5 + movq 32(%rdx,k,8), m4 + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 // Move x into another register to leave %rdx free for multiplies and use of n2 - movq %rdx, x + movq %rdx, x // Reduce the top 6 digits mod n_384 (a conditional subtraction of n_384) - movq $0x1313e695333ad68d, n0 - movq $0xa7e5f24db74f5885, n1 - movq $0x389cb27e0bc8d220, n2 - - addq n0, m0 - adcq n1, m1 - adcq n2, m2 - adcq $0, m3 - adcq $0, m4 - adcq $0, m5 - sbbq d, d - notq d - andq d, n0 - andq d, n1 - andq d, n2 - subq n0, m0 - sbbq n1, m1 - sbbq n2, m2 - sbbq $0, m3 - sbbq $0, m4 - sbbq $0, m5 + movq $0x1313e695333ad68d, n0 + movq $0xa7e5f24db74f5885, n1 + movq $0x389cb27e0bc8d220, n2 + + addq n0, m0 + adcq n1, m1 + adcq n2, m2 + adcq $0, m3 + adcq $0, m4 + adcq $0, m5 + sbbq d, d + notq d + andq d, n0 + andq d, n1 + andq d, n2 + subq n0, m0 + sbbq n1, m1 + sbbq n2, m2 + sbbq $0, m3 + sbbq $0, m4 + sbbq $0, m5 // Now do (k-6) iterations of 7->6 word modular reduction - testq k, k - jz writeback + testq k, k + jz writeback loop: // Compute q = min (m5 + 1) (2^64 - 1) - movl $1, qshort - addq m5, q - sbbq d, d - orq d, q + movl $1, qshort + addq m5, q + sbbq d, d + orq d, q // Load the next digit so current m to reduce = [m5;m4;m3;m2;m1;m0;d] - movq -8(x,k,8), d + movq -8(x,k,8), d // Now form [m5;m4;m3;m2;m1;m0;d] = m - q * n_384 - subq q, m5 - xorq n0, n0 - movq $0x1313e695333ad68d, n0 - mulxq n0, n0, n1 - adcxq n0, d - adoxq n1, m0 - movq $0xa7e5f24db74f5885, n0 - mulxq n0, n0, n1 - adcxq n0, m0 - adoxq n1, m1 - movq $0x389cb27e0bc8d220, n0 - mulxq n0, n0, n1 - adcxq n0, m1 - movl $0, n0short - adoxq n0, n1 - adcxq n1, m2 - adcq $0, m3 - adcq $0, m4 - adcq $0, m5 + subq q, m5 + xorq n0, n0 + movq $0x1313e695333ad68d, n0 + mulxq n0, n0, n1 + adcxq n0, d + adoxq n1, m0 + movq $0xa7e5f24db74f5885, n0 + mulxq n0, n0, n1 + adcxq n0, m0 + adoxq n1, m1 + movq $0x389cb27e0bc8d220, n0 + mulxq n0, n0, n1 + adcxq n0, m1 + movl $0, n0short + adoxq n0, n1 + adcxq n1, m2 + adcq $0, m3 + adcq $0, m4 + adcq $0, m5 // Now our top word m5 is either zero or all 1s. Use it for a masked // addition of n_384, which we can do by a *subtraction* of // 2^384 - n_384 from our portion - movq $0x1313e695333ad68d, n0 - andq m5, n0 - movq $0xa7e5f24db74f5885, n1 - andq m5, n1 - movq $0x389cb27e0bc8d220, n2 - andq m5, n2 + movq $0x1313e695333ad68d, n0 + andq m5, n0 + movq $0xa7e5f24db74f5885, n1 + andq m5, n1 + movq $0x389cb27e0bc8d220, n2 + andq m5, n2 - subq n0, d - sbbq n1, m0 - sbbq n2, m1 - sbbq $0, m2 - sbbq $0, m3 - sbbq $0, m4 + subq n0, d + sbbq n1, m0 + sbbq n2, m1 + sbbq $0, m2 + sbbq $0, m3 + sbbq $0, m4 // Now shuffle registers up and loop - movq m4, m5 - movq m3, m4 - movq m2, m3 - movq m1, m2 - movq m0, m1 - movq d, m0 + movq m4, m5 + movq m3, m4 + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 - decq k - jnz loop + decq k + jnz loop // Write back writeback: - movq m0, (z) - movq m1, 8(z) - movq m2, 16(z) - movq m3, 24(z) - movq m4, 32(z) - movq m5, 40(z) + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + movq m4, 32(z) + movq m5, 40(z) // Restore registers and return - popq %r14 - popq %r13 - popq %r12 - popq %rbx - ret + popq %r14 + popq %r13 + popq %r12 + popq %rbx + ret shortinput: - xorq m0, m0 - xorq m1, m1 - xorq m2, m2 - xorq m3, m3 - xorq m4, m4 - xorq m5, m5 - - testq k, k - jz writeback - movq (%rdx), m0 - decq k - jz writeback - movq 8(%rdx), m1 - decq k - jz writeback - movq 16(%rdx), m2 - decq k - jz writeback - movq 24(%rdx), m3 - decq k - jz writeback - movq 32(%rdx), m4 - jmp writeback + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + xorq m4, m4 + xorq m5, m5 + + testq k, k + jz writeback + movq (%rdx), m0 + decq k + jz writeback + movq 8(%rdx), m1 + decq k + jz writeback + movq 16(%rdx), m2 + decq k + jz writeback + movq 24(%rdx), m3 + decq k + jz writeback + movq 32(%rdx), m4 + jmp writeback #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_mod_n384_alt.S b/x86_att/p384/bignum_mod_n384_alt.S index b45d110c9c..bdce0a0926 100644 --- a/x86_att/p384/bignum_mod_n384_alt.S +++ b/x86_att/p384/bignum_mod_n384_alt.S @@ -57,171 +57,171 @@ _bignum_mod_n384_alt: // Save extra registers - pushq %rbp - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 // If the input is already <= 5 words long, go to a trivial "copy" path - cmpq $6, k - jc shortinput + cmpq $6, k + jc shortinput // Otherwise load the top 6 digits (top-down) and reduce k by 6 - subq $6, k - movq 40(%rdx,k,8), m5 - movq 32(%rdx,k,8), m4 - movq 24(%rdx,k,8), m3 - movq 16(%rdx,k,8), m2 - movq 8(%rdx,k,8), m1 - movq (%rdx,k,8), m0 + subq $6, k + movq 40(%rdx,k,8), m5 + movq 32(%rdx,k,8), m4 + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 // Move x into another register to leave %rdx free for multiplies and use of n2 - movq %rdx, x + movq %rdx, x // Reduce the top 6 digits mod n_384 (a conditional subtraction of n_384) - movq $0x1313e695333ad68d, n0 - movq $0xa7e5f24db74f5885, n1 - movq $0x389cb27e0bc8d220, n2 - - addq n0, m0 - adcq n1, m1 - adcq n2, m2 - adcq $0, m3 - adcq $0, m4 - adcq $0, m5 - sbbq d, d - notq d - andq d, n0 - andq d, n1 - andq d, n2 - subq n0, m0 - sbbq n1, m1 - sbbq n2, m2 - sbbq $0, m3 - sbbq $0, m4 - sbbq $0, m5 + movq $0x1313e695333ad68d, n0 + movq $0xa7e5f24db74f5885, n1 + movq $0x389cb27e0bc8d220, n2 + + addq n0, m0 + adcq n1, m1 + adcq n2, m2 + adcq $0, m3 + adcq $0, m4 + adcq $0, m5 + sbbq d, d + notq d + andq d, n0 + andq d, n1 + andq d, n2 + subq n0, m0 + sbbq n1, m1 + sbbq n2, m2 + sbbq $0, m3 + sbbq $0, m4 + sbbq $0, m5 // Now do (k-6) iterations of 7->6 word modular reduction - testq k, k - jz writeback + testq k, k + jz writeback loop: // Compute q = min (m5 + 1) (2^64 - 1) - movl $1, qshort - addq m5, q - sbbq d, d - orq d, q + movl $1, qshort + addq m5, q + sbbq d, d + orq d, q // Load the next digit so current m to reduce = [m5;m4;m3;m2;m1;m0;d] - movq -8(x,k,8), d + movq -8(x,k,8), d // Now form [m5;m4;m3;m2;m1;m0;d] = m - q * n_384 - subq q, m5 - movq $0x1313e695333ad68d, %rax - mulq q - addq %rax, d - adcq %rdx, m0 - sbbq c, c - movq $0xa7e5f24db74f5885, %rax - mulq q - subq c, %rdx - addq %rax, m0 - adcq %rdx, m1 - sbbq c, c - movq $0x389cb27e0bc8d220, n0 - mulq q - subq c, %rdx - addq %rax, m1 - adcq %rdx, m2 - adcq $0, m3 - adcq $0, m4 - adcq $0, m5 + subq q, m5 + movq $0x1313e695333ad68d, %rax + mulq q + addq %rax, d + adcq %rdx, m0 + sbbq c, c + movq $0xa7e5f24db74f5885, %rax + mulq q + subq c, %rdx + addq %rax, m0 + adcq %rdx, m1 + sbbq c, c + movq $0x389cb27e0bc8d220, n0 + mulq q + subq c, %rdx + addq %rax, m1 + adcq %rdx, m2 + adcq $0, m3 + adcq $0, m4 + adcq $0, m5 // Now our top word m5 is either zero or all 1s. Use it for a masked // addition of n_384, which we can do by a *subtraction* of // 2^384 - n_384 from our portion - movq $0x1313e695333ad68d, n0 - andq m5, n0 - movq $0xa7e5f24db74f5885, n1 - andq m5, n1 - movq $0x389cb27e0bc8d220, n2 - andq m5, n2 + movq $0x1313e695333ad68d, n0 + andq m5, n0 + movq $0xa7e5f24db74f5885, n1 + andq m5, n1 + movq $0x389cb27e0bc8d220, n2 + andq m5, n2 - subq n0, d - sbbq n1, m0 - sbbq n2, m1 - sbbq $0, m2 - sbbq $0, m3 - sbbq $0, m4 + subq n0, d + sbbq n1, m0 + sbbq n2, m1 + sbbq $0, m2 + sbbq $0, m3 + sbbq $0, m4 // Now shuffle registers up and loop - movq m4, m5 - movq m3, m4 - movq m2, m3 - movq m1, m2 - movq m0, m1 - movq d, m0 + movq m4, m5 + movq m3, m4 + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 - decq k - jnz loop + decq k + jnz loop // Write back writeback: - movq m0, (z) - movq m1, 8(z) - movq m2, 16(z) - movq m3, 24(z) - movq m4, 32(z) - movq m5, 40(z) + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + movq m4, 32(z) + movq m5, 40(z) // Restore registers and return - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp - ret + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + ret shortinput: - xorq m0, m0 - xorq m1, m1 - xorq m2, m2 - xorq m3, m3 - xorq m4, m4 - xorq m5, m5 - - testq k, k - jz writeback - movq (%rdx), m0 - decq k - jz writeback - movq 8(%rdx), m1 - decq k - jz writeback - movq 16(%rdx), m2 - decq k - jz writeback - movq 24(%rdx), m3 - decq k - jz writeback - movq 32(%rdx), m4 - jmp writeback + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + xorq m4, m4 + xorq m5, m5 + + testq k, k + jz writeback + movq (%rdx), m0 + decq k + jz writeback + movq 8(%rdx), m1 + decq k + jz writeback + movq 16(%rdx), m2 + decq k + jz writeback + movq 24(%rdx), m3 + decq k + jz writeback + movq 32(%rdx), m4 + jmp writeback #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_mod_p384.S b/x86_att/p384/bignum_mod_p384.S index 53cd68e0af..7f3de8c076 100644 --- a/x86_att/p384/bignum_mod_p384.S +++ b/x86_att/p384/bignum_mod_p384.S @@ -55,168 +55,168 @@ _bignum_mod_p384: // Save extra registers - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 // If the input is already <= 5 words long, go to a trivial "copy" path - cmpq $6, k - jc shortinput + cmpq $6, k + jc shortinput // Otherwise load the top 6 digits (top-down) and reduce k by 6 - subq $6, k - movq 40(%rdx,k,8), m5 - movq 32(%rdx,k,8), m4 - movq 24(%rdx,k,8), m3 - movq 16(%rdx,k,8), m2 - movq 8(%rdx,k,8), m1 - movq (%rdx,k,8), m0 + subq $6, k + movq 40(%rdx,k,8), m5 + movq 32(%rdx,k,8), m4 + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 // Move x into another register to leave %rdx free for multiplies and use of n2 - movq %rdx, x + movq %rdx, x // Reduce the top 6 digits mod p_384 (a conditional subtraction of p_384) - movl $0x00000000ffffffff, n0short - movq $0xffffffff00000000, n1 - movq $0xfffffffffffffffe, n2 - - subq n0, m0 - sbbq n1, m1 - sbbq n2, m2 - sbbq $-1, m3 - sbbq $-1, m4 - sbbq $-1, m5 - - sbbq d, d - andq d, n0 - andq d, n1 - andq d, n2 - addq n0, m0 - adcq n1, m1 - adcq n2, m2 - adcq d, m3 - adcq d, m4 - adcq d, m5 + movl $0x00000000ffffffff, n0short + movq $0xffffffff00000000, n1 + movq $0xfffffffffffffffe, n2 + + subq n0, m0 + sbbq n1, m1 + sbbq n2, m2 + sbbq $-1, m3 + sbbq $-1, m4 + sbbq $-1, m5 + + sbbq d, d + andq d, n0 + andq d, n1 + andq d, n2 + addq n0, m0 + adcq n1, m1 + adcq n2, m2 + adcq d, m3 + adcq d, m4 + adcq d, m5 // Now do (k-6) iterations of 7->6 word modular reduction - testq k, k - jz writeback + testq k, k + jz writeback loop: // Compute q = min (m5 + 1) (2^64 - 1) - movl $1, qshort - addq m5, q - sbbq d, d - orq d, q + movl $1, qshort + addq m5, q + sbbq d, d + orq d, q // Load the next digit so current m to reduce = [m5;m4;m3;m2;m1;m0;d] - movq -8(x,k,8), d + movq -8(x,k,8), d // Now form [m5;m4;m3;m2;m1;m0;d] = m - q * p_384. To use an addition for // the main calculation we do (m - 2^384 * q) + q * (2^384 - p_384) // where 2^384 - p_384 = [0;0;0;1;0x00000000ffffffff;0xffffffff00000001]. // The extra subtraction of 2^384 * q is the first instruction. - subq q, m5 - xorq n0, n0 - movq $0xffffffff00000001, n0 - mulxq n0, n0, n1 - adcxq n0, d - adoxq n1, m0 - movl $0x00000000ffffffff, n0short - mulxq n0, n0, n1 - adcxq n0, m0 - adoxq n1, m1 - adcxq q, m1 - movl $0, n0short - adoxq n0, n0 - adcxq n0, m2 - adcq $0, m3 - adcq $0, m4 - adcq $0, m5 + subq q, m5 + xorq n0, n0 + movq $0xffffffff00000001, n0 + mulxq n0, n0, n1 + adcxq n0, d + adoxq n1, m0 + movl $0x00000000ffffffff, n0short + mulxq n0, n0, n1 + adcxq n0, m0 + adoxq n1, m1 + adcxq q, m1 + movl $0, n0short + adoxq n0, n0 + adcxq n0, m2 + adcq $0, m3 + adcq $0, m4 + adcq $0, m5 // Now our top word m5 is either zero or all 1s. Use it for a masked // addition of p_384, which we can do by a *subtraction* of // 2^384 - p_384 from our portion - movq $0xffffffff00000001, n0 - andq m5, n0 - movl $0x00000000ffffffff, n1short - andq m5, n1 - andq $1, m5 + movq $0xffffffff00000001, n0 + andq m5, n0 + movl $0x00000000ffffffff, n1short + andq m5, n1 + andq $1, m5 - subq n0, d - sbbq n1, m0 - sbbq m5, m1 - sbbq $0, m2 - sbbq $0, m3 - sbbq $0, m4 + subq n0, d + sbbq n1, m0 + sbbq m5, m1 + sbbq $0, m2 + sbbq $0, m3 + sbbq $0, m4 // Now shuffle registers up and loop - movq m4, m5 - movq m3, m4 - movq m2, m3 - movq m1, m2 - movq m0, m1 - movq d, m0 + movq m4, m5 + movq m3, m4 + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 - decq k - jnz loop + decq k + jnz loop // Write back writeback: - movq m0, (z) - movq m1, 8(z) - movq m2, 16(z) - movq m3, 24(z) - movq m4, 32(z) - movq m5, 40(z) + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + movq m4, 32(z) + movq m5, 40(z) // Restore registers and return - popq %r14 - popq %r13 - popq %r12 - popq %rbx - ret + popq %r14 + popq %r13 + popq %r12 + popq %rbx + ret shortinput: - xorq m0, m0 - xorq m1, m1 - xorq m2, m2 - xorq m3, m3 - xorq m4, m4 - xorq m5, m5 - - testq k, k - jz writeback - movq (%rdx), m0 - decq k - jz writeback - movq 8(%rdx), m1 - decq k - jz writeback - movq 16(%rdx), m2 - decq k - jz writeback - movq 24(%rdx), m3 - decq k - jz writeback - movq 32(%rdx), m4 - jmp writeback + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + xorq m4, m4 + xorq m5, m5 + + testq k, k + jz writeback + movq (%rdx), m0 + decq k + jz writeback + movq 8(%rdx), m1 + decq k + jz writeback + movq 16(%rdx), m2 + decq k + jz writeback + movq 24(%rdx), m3 + decq k + jz writeback + movq 32(%rdx), m4 + jmp writeback #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_mod_p384_alt.S b/x86_att/p384/bignum_mod_p384_alt.S index c91598bdb9..a4ef191f46 100644 --- a/x86_att/p384/bignum_mod_p384_alt.S +++ b/x86_att/p384/bignum_mod_p384_alt.S @@ -59,168 +59,168 @@ _bignum_mod_p384_alt: // Save extra registers - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 // If the input is already <= 5 words long, go to a trivial "copy" path - cmpq $6, k - jc shortinput + cmpq $6, k + jc shortinput // Otherwise load the top 6 digits (top-down) and reduce k by 6 - subq $6, k - movq 40(%rdx,k,8), m5 - movq 32(%rdx,k,8), m4 - movq 24(%rdx,k,8), m3 - movq 16(%rdx,k,8), m2 - movq 8(%rdx,k,8), m1 - movq (%rdx,k,8), m0 + subq $6, k + movq 40(%rdx,k,8), m5 + movq 32(%rdx,k,8), m4 + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 // Move x into another register to leave %rdx free for multiplies and use of n2 - movq %rdx, x + movq %rdx, x // Reduce the top 6 digits mod p_384 (a conditional subtraction of p_384) - movl $0x00000000ffffffff, n0short - movq $0xffffffff00000000, n1 - movq $0xfffffffffffffffe, n2 - - subq n0, m0 - sbbq n1, m1 - sbbq n2, m2 - sbbq $-1, m3 - sbbq $-1, m4 - sbbq $-1, m5 - - sbbq d, d - andq d, n0 - andq d, n1 - andq d, n2 - addq n0, m0 - adcq n1, m1 - adcq n2, m2 - adcq d, m3 - adcq d, m4 - adcq d, m5 + movl $0x00000000ffffffff, n0short + movq $0xffffffff00000000, n1 + movq $0xfffffffffffffffe, n2 + + subq n0, m0 + sbbq n1, m1 + sbbq n2, m2 + sbbq $-1, m3 + sbbq $-1, m4 + sbbq $-1, m5 + + sbbq d, d + andq d, n0 + andq d, n1 + andq d, n2 + addq n0, m0 + adcq n1, m1 + adcq n2, m2 + adcq d, m3 + adcq d, m4 + adcq d, m5 // Now do (k-6) iterations of 7->6 word modular reduction - testq k, k - jz writeback + testq k, k + jz writeback loop: // Compute q = min (m5 + 1) (2^64 - 1) - movl $1, qshort - addq m5, q - sbbq d, d - orq d, q + movl $1, qshort + addq m5, q + sbbq d, d + orq d, q // Load the next digit so current m to reduce = [m5;m4;m3;m2;m1;m0;d] - movq -8(x,k,8), d + movq -8(x,k,8), d // Now form [m5;m4;m3;m2;m1;m0;d] = m - q * p_384. To use an addition for // the main calculation we do (m - 2^384 * q) + q * (2^384 - p_384) // where 2^384 - p_384 = [0;0;0;1;0x00000000ffffffff;0xffffffff00000001]. // The extra subtraction of 2^384 * q is the first instruction. - subq q, m5 - movq $0xffffffff00000001, %rax - mulq q - addq %rax, d - adcq %rdx, m0 - adcq q, m1 - movq q, %rax - sbbq c, c - movl $0x00000000ffffffff, %edx - negq c - mulq %rdx - addq %rax, m0 - adcq %rdx, m1 - adcq c, m2 - adcq $0, m3 - adcq $0, m4 - adcq $0, m5 + subq q, m5 + movq $0xffffffff00000001, %rax + mulq q + addq %rax, d + adcq %rdx, m0 + adcq q, m1 + movq q, %rax + sbbq c, c + movl $0x00000000ffffffff, %edx + negq c + mulq %rdx + addq %rax, m0 + adcq %rdx, m1 + adcq c, m2 + adcq $0, m3 + adcq $0, m4 + adcq $0, m5 // Now our top word m5 is either zero or all 1s. Use it for a masked // addition of p_384, which we can do by a *subtraction* of // 2^384 - p_384 from our portion - movq $0xffffffff00000001, n0 - andq m5, n0 - movl $0x00000000ffffffff, n1short - andq m5, n1 - andq $1, m5 + movq $0xffffffff00000001, n0 + andq m5, n0 + movl $0x00000000ffffffff, n1short + andq m5, n1 + andq $1, m5 - subq n0, d - sbbq n1, m0 - sbbq m5, m1 - sbbq $0, m2 - sbbq $0, m3 - sbbq $0, m4 + subq n0, d + sbbq n1, m0 + sbbq m5, m1 + sbbq $0, m2 + sbbq $0, m3 + sbbq $0, m4 // Now shuffle registers up and loop - movq m4, m5 - movq m3, m4 - movq m2, m3 - movq m1, m2 - movq m0, m1 - movq d, m0 + movq m4, m5 + movq m3, m4 + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 - decq k - jnz loop + decq k + jnz loop // Write back writeback: - movq m0, (z) - movq m1, 8(z) - movq m2, 16(z) - movq m3, 24(z) - movq m4, 32(z) - movq m5, 40(z) + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + movq m4, 32(z) + movq m5, 40(z) // Restore registers and return - popq %r14 - popq %r13 - popq %r12 - popq %rbx - ret + popq %r14 + popq %r13 + popq %r12 + popq %rbx + ret shortinput: - xorq m0, m0 - xorq m1, m1 - xorq m2, m2 - xorq m3, m3 - xorq m4, m4 - xorq m5, m5 - - testq k, k - jz writeback - movq (%rdx), m0 - decq k - jz writeback - movq 8(%rdx), m1 - decq k - jz writeback - movq 16(%rdx), m2 - decq k - jz writeback - movq 24(%rdx), m3 - decq k - jz writeback - movq 32(%rdx), m4 - jmp writeback + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + xorq m4, m4 + xorq m5, m5 + + testq k, k + jz writeback + movq (%rdx), m0 + decq k + jz writeback + movq 8(%rdx), m1 + decq k + jz writeback + movq 16(%rdx), m2 + decq k + jz writeback + movq 24(%rdx), m3 + decq k + jz writeback + movq 32(%rdx), m4 + jmp writeback #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_montmul_p384.S b/x86_att/p384/bignum_montmul_p384.S index c4b76d6181..e6b8ba5598 100644 --- a/x86_att/p384/bignum_montmul_p384.S +++ b/x86_att/p384/bignum_montmul_p384.S @@ -71,28 +71,28 @@ #define montredc(d7,d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rdx ; \ - shlq $32, %rdx ; \ - addq d0, %rdx ; \ + movq d0, %rdx ; \ + shlq $32, %rdx ; \ + addq d0, %rdx ; \ /* Construct [%rbp;%rbx;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 as a temp */ \ - xorl %ebp, %ebp ; \ - movq $0xffffffff00000001, %rax ; \ - mulxq %rax, %rbx, %rax ; \ - movl $0x00000000ffffffff, %ebx ; \ - mulxq %rbx, d0, %rbx ; \ - adcq d0, %rax ; \ - adcq %rdx, %rbx ; \ - adcl %ebp, %ebp ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0x00000000ffffffff, %ebx ; \ + mulxq %rbx, d0, %rbx ; \ + adcq d0, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq %rbx, d2 ; \ - sbbq %rbp, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - sbbq $0, %rdx ; \ - addq %rdx, d6 ; \ - adcq $0, d7 + subq %rax, d1 ; \ + sbbq %rbx, d2 ; \ + sbbq %rbp, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + sbbq $0, %rdx ; \ + addq %rdx, d6 ; \ + adcq $0, d7 bignum_montmul_p384: _bignum_montmul_p384: diff --git a/x86_att/p384/bignum_montmul_p384_alt.S b/x86_att/p384/bignum_montmul_p384_alt.S index 36ab5b0d2f..b684265236 100644 --- a/x86_att/p384/bignum_montmul_p384_alt.S +++ b/x86_att/p384/bignum_montmul_p384_alt.S @@ -92,29 +92,29 @@ #define montredc(d7,d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rbx ; \ - shlq $32, %rbx ; \ - addq d0, %rbx ; \ + movq d0, %rbx ; \ + shlq $32, %rbx ; \ + addq d0, %rbx ; \ /* Construct [%rbp;%rdx;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 as a temp */ \ - xorl %ebp, %ebp ; \ - movq $0xffffffff00000001, %rax ; \ - mulq %rbx; \ - movq %rdx, d0 ; \ - movq $0x00000000ffffffff, %rax ; \ - mulq %rbx; \ - addq d0, %rax ; \ - adcq %rbx, %rdx ; \ - adcl %ebp, %ebp ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rbx; \ + movq %rdx, d0 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rbx; \ + addq d0, %rax ; \ + adcq %rbx, %rdx ; \ + adcl %ebp, %ebp ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq %rdx, d2 ; \ - sbbq %rbp, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - sbbq $0, %rbx ; \ - addq %rbx, d6 ; \ - adcq $0, d7 + subq %rax, d1 ; \ + sbbq %rdx, d2 ; \ + sbbq %rbp, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + sbbq $0, %rbx ; \ + addq %rbx, d6 ; \ + adcq $0, d7 bignum_montmul_p384_alt: _bignum_montmul_p384_alt: diff --git a/x86_att/p384/bignum_montsqr_p384.S b/x86_att/p384/bignum_montsqr_p384.S index a6628ad8c3..2e3d964597 100644 --- a/x86_att/p384/bignum_montsqr_p384.S +++ b/x86_att/p384/bignum_montsqr_p384.S @@ -68,28 +68,28 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rdx ; \ - shlq $32, %rdx ; \ - addq d0, %rdx ; \ + movq d0, %rdx ; \ + shlq $32, %rdx ; \ + addq d0, %rdx ; \ /* Construct [%rbx;d0;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 */ \ /* and %rbx as temps. */ \ - movq $0xffffffff00000001, %rax ; \ - mulxq %rax, d0, %rax ; \ - movl $0x00000000ffffffff, %ebx ; \ - mulxq %rbx, %rbx, d0 ; \ - addq %rbx, %rax ; \ - adcq %rdx, d0 ; \ - movl $0, %ebx ; \ - adcq %rbx, %rbx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, d0, %rax ; \ + movl $0x00000000ffffffff, %ebx ; \ + mulxq %rbx, %rbx, d0 ; \ + addq %rbx, %rax ; \ + adcq %rdx, d0 ; \ + movl $0, %ebx ; \ + adcq %rbx, %rbx ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq d0, d2 ; \ - sbbq %rbx, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - movq %rdx, d6 ; \ - sbbq $0, d6 + subq %rax, d1 ; \ + sbbq d0, d2 ; \ + sbbq %rbx, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + movq %rdx, d6 ; \ + sbbq $0, d6 bignum_montsqr_p384: _bignum_montsqr_p384: diff --git a/x86_att/p384/bignum_montsqr_p384_alt.S b/x86_att/p384/bignum_montsqr_p384_alt.S index ffe475525d..3a3f9a2715 100644 --- a/x86_att/p384/bignum_montsqr_p384_alt.S +++ b/x86_att/p384/bignum_montsqr_p384_alt.S @@ -89,29 +89,29 @@ #define montreds(d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rbx ; \ - shlq $32, %rbx ; \ - addq d0, %rbx ; \ + movq d0, %rbx ; \ + shlq $32, %rbx ; \ + addq d0, %rbx ; \ /* Construct [%rax;%rdx;d0;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 */ \ /* and %rbx as temps. */ \ - movq $0xffffffff00000001, %rax ; \ - mulq %rbx; \ - movq %rdx, d0 ; \ - movq $0x00000000ffffffff, %rax ; \ - mulq %rbx; \ - addq %rax, d0 ; \ - movl $0, %eax ; \ - adcq %rbx, %rdx ; \ - adcl %eax, %eax ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rbx; \ + movq %rdx, d0 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rbx; \ + addq %rax, d0 ; \ + movl $0, %eax ; \ + adcq %rbx, %rdx ; \ + adcl %eax, %eax ; \ /* Now subtract that and add 2^384 * w */ \ - subq d0, d1 ; \ - sbbq %rdx, d2 ; \ - sbbq %rax, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - movq %rbx, d6 ; \ - sbbq $0, d6 + subq d0, d1 ; \ + sbbq %rdx, d2 ; \ + sbbq %rax, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + movq %rbx, d6 ; \ + sbbq $0, d6 bignum_montsqr_p384_alt: _bignum_montsqr_p384_alt: diff --git a/x86_att/p384/bignum_mux_6.S b/x86_att/p384/bignum_mux_6.S index 3305e0f01d..cc484bf9bb 100644 --- a/x86_att/p384/bignum_mux_6.S +++ b/x86_att/p384/bignum_mux_6.S @@ -41,39 +41,39 @@ bignum_mux_6: _bignum_mux_6: - testq p, p + testq p, p - movq (x), a - movq (y), b - cmovzq b, a - movq a, (z) + movq (x), a + movq (y), b + cmovzq b, a + movq a, (z) - movq 8(x), a - movq 8(y), b - cmovzq b, a - movq a, 8(z) + movq 8(x), a + movq 8(y), b + cmovzq b, a + movq a, 8(z) - movq 16(x), a - movq 16(y), b - cmovzq b, a - movq a, 16(z) + movq 16(x), a + movq 16(y), b + cmovzq b, a + movq a, 16(z) - movq 24(x), a - movq 24(y), b - cmovzq b, a - movq a, 24(z) + movq 24(x), a + movq 24(y), b + cmovzq b, a + movq a, 24(z) - movq 32(x), a - movq 32(y), b - cmovzq b, a - movq a, 32(z) + movq 32(x), a + movq 32(y), b + cmovzq b, a + movq a, 32(z) - movq 40(x), a - movq 40(y), b - cmovzq b, a - movq a, 40(z) + movq 40(x), a + movq 40(y), b + cmovzq b, a + movq a, 40(z) - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_neg_p384.S b/x86_att/p384/bignum_neg_p384.S index e80158e1b4..5a6c62ea53 100644 --- a/x86_att/p384/bignum_neg_p384.S +++ b/x86_att/p384/bignum_neg_p384.S @@ -45,47 +45,47 @@ _bignum_neg_p384: // Or together the input digits and create a bitmask q if this is nonzero, so // that we avoid doing -0 = p_384 and hence maintain strict modular reduction - movq (x), n0 - orq 8(x), n0 - movq 16(x), n1 - orq 24(x), n1 - movq 32(x), n2 - orq 40(x), n2 - orq n1, n0 - orq n2, n0 - negq n0 - sbbq q, q + movq (x), n0 + orq 8(x), n0 + movq 16(x), n1 + orq 24(x), n1 + movq 32(x), n2 + orq 40(x), n2 + orq n1, n0 + orq n2, n0 + negq n0 + sbbq q, q // Let [q;n4;n3;n2;n1;n0] = if q then p_384 else 0 - movl $0x00000000ffffffff, n0short - andq q, n0 - movq $0xffffffff00000000, n1 - andq q, n1 - movq $0xfffffffffffffffe, n2 - andq q, n2 - movq q, n3 - movq q, n4 + movl $0x00000000ffffffff, n0short + andq q, n0 + movq $0xffffffff00000000, n1 + andq q, n1 + movq $0xfffffffffffffffe, n2 + andq q, n2 + movq q, n3 + movq q, n4 // Do the subtraction - subq (x), n0 - sbbq 8(x), n1 - sbbq 16(x), n2 - sbbq 24(x), n3 - sbbq 32(x), n4 - sbbq 40(x), q + subq (x), n0 + sbbq 8(x), n1 + sbbq 16(x), n2 + sbbq 24(x), n3 + sbbq 32(x), n4 + sbbq 40(x), q // Write back - movq n0, (z) - movq n1, 8(z) - movq n2, 16(z) - movq n3, 24(z) - movq n4, 32(z) - movq q, 40(z) + movq n0, (z) + movq n1, 8(z) + movq n2, 16(z) + movq n3, 24(z) + movq n4, 32(z) + movq q, 40(z) - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_nonzero_6.S b/x86_att/p384/bignum_nonzero_6.S index 49b5ded9a8..585aa1e58b 100644 --- a/x86_att/p384/bignum_nonzero_6.S +++ b/x86_att/p384/bignum_nonzero_6.S @@ -39,20 +39,20 @@ _bignum_nonzero_6: // Generate a = an OR of all the words in the bignum - movq (x), a - movq 8(x), d - orq 16(x), a - orq 24(x), d - orq 32(x), a - orq 40(x), d - orq d, a + movq (x), a + movq 8(x), d + orq 16(x), a + orq 24(x), d + orq 32(x), a + orq 40(x), d + orq d, a // Set a standard C condition based on whether a is nonzero - movl $1, dshort - cmovnzq d, a + movl $1, dshort + cmovnzq d, a - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_optneg_p384.S b/x86_att/p384/bignum_optneg_p384.S index 4cc49044d9..f14f8dc682 100644 --- a/x86_att/p384/bignum_optneg_p384.S +++ b/x86_att/p384/bignum_optneg_p384.S @@ -50,63 +50,63 @@ _bignum_optneg_p384: // This step is redundant if we know a priori that the input is nonzero, which // is the case for the y coordinate of points on the P-384 curve, for example. - movq (x), n0 - orq 8(x), n0 - movq 16(x), n1 - orq 24(x), n1 - movq 32(x), n2 - orq 40(x), n2 - orq n1, n0 - orq n2, n0 - negq n0 - sbbq n0, n0 - andq n0, q + movq (x), n0 + orq 8(x), n0 + movq 16(x), n1 + orq 24(x), n1 + movq 32(x), n2 + orq 40(x), n2 + orq n1, n0 + orq n2, n0 + negq n0 + sbbq n0, n0 + andq n0, q // Turn q into a bitmask, all 1s for q=false, all 0s for q=true - negq q - sbbq q, q - notq q + negq q + sbbq q, q + notq q // Let [n5;n4;n3;n2;n1] = if q then p_384 else -1 - movl $0x00000000ffffffff, n0short - orq q, n0 - movq $0xffffffff00000000, n1 - orq q, n1 - movq $0xfffffffffffffffe, n2 - orq q, n2 - movq $0xffffffffffffffff, n3 - movq n3, n4 - movq n3, n5 + movl $0x00000000ffffffff, n0short + orq q, n0 + movq $0xffffffff00000000, n1 + orq q, n1 + movq $0xfffffffffffffffe, n2 + orq q, n2 + movq $0xffffffffffffffff, n3 + movq n3, n4 + movq n3, n5 // Subtract so [n5;n4;n3;n2;n1;n0] = if q then p_384 - x else -1 - x - subq (x), n0 - sbbq 8(x), n1 - sbbq 16(x), n2 - sbbq 24(x), n3 - sbbq 32(x), n4 - sbbq 40(x), n5 + subq (x), n0 + sbbq 8(x), n1 + sbbq 16(x), n2 + sbbq 24(x), n3 + sbbq 32(x), n4 + sbbq 40(x), n5 // XOR the words with the bitmask, which in the case q = false has the // effect of restoring ~(-1 - x) = -(-1 - x) - 1 = 1 + x - 1 = x // and write back the digits to the output - xorq q, n0 - movq n0, (z) - xorq q, n1 - movq n1, 8(z) - xorq q, n2 - movq n2, 16(z) - xorq q, n3 - movq n3, 24(z) - xorq q, n4 - movq n4, 32(z) - xorq q, n5 - movq n5, 40(z) - - ret + xorq q, n0 + movq n0, (z) + xorq q, n1 + movq n1, 8(z) + xorq q, n2 + movq n2, 16(z) + xorq q, n3 + movq n3, 24(z) + xorq q, n4 + movq n4, 32(z) + xorq q, n5 + movq n5, 40(z) + + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_tomont_p384.S b/x86_att/p384/bignum_tomont_p384.S index cfaf4fc9dd..85105e4667 100644 --- a/x86_att/p384/bignum_tomont_p384.S +++ b/x86_att/p384/bignum_tomont_p384.S @@ -70,28 +70,28 @@ #define montredc(d7,d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rdx ; \ - shlq $32, %rdx ; \ - addq d0, %rdx ; \ + movq d0, %rdx ; \ + shlq $32, %rdx ; \ + addq d0, %rdx ; \ /* Construct [%rbp;%rcx;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 as a temp */ \ - xorl %ebp, %ebp ; \ - movq $0xffffffff00000001, %rax ; \ - mulxq %rax, %rcx, %rax ; \ - movl $0x00000000ffffffff, %ecx ; \ - mulxq %rcx, d0, %rcx ; \ - adcq d0, %rax ; \ - adcq %rdx, %rcx ; \ - adcl %ebp, %ebp ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rcx, %rax ; \ + movl $0x00000000ffffffff, %ecx ; \ + mulxq %rcx, d0, %rcx ; \ + adcq d0, %rax ; \ + adcq %rdx, %rcx ; \ + adcl %ebp, %ebp ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq %rcx, d2 ; \ - sbbq %rbp, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - sbbq $0, %rdx ; \ - addq %rdx, d6 ; \ - adcq $0, d7 + subq %rax, d1 ; \ + sbbq %rcx, d2 ; \ + sbbq %rbp, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + sbbq $0, %rdx ; \ + addq %rdx, d6 ; \ + adcq $0, d7 bignum_tomont_p384: _bignum_tomont_p384: diff --git a/x86_att/p384/bignum_tomont_p384_alt.S b/x86_att/p384/bignum_tomont_p384_alt.S index 05525e38fb..6686e7804e 100644 --- a/x86_att/p384/bignum_tomont_p384_alt.S +++ b/x86_att/p384/bignum_tomont_p384_alt.S @@ -87,29 +87,29 @@ #define montredc(d7,d6,d5,d4,d3,d2,d1,d0) \ /* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ - movq d0, %rbx ; \ - shlq $32, %rbx ; \ - addq d0, %rbx ; \ + movq d0, %rbx ; \ + shlq $32, %rbx ; \ + addq d0, %rbx ; \ /* Construct [%rcx;%rdx;%rax;-] = (2^384 - p_384) * w */ \ /* We know the lowest word will cancel so we can re-use d0 as a temp */ \ - xorl %ecx, %ecx ; \ - movq $0xffffffff00000001, %rax ; \ - mulq %rbx; \ - movq %rdx, d0 ; \ - movq $0x00000000ffffffff, %rax ; \ - mulq %rbx; \ - addq d0, %rax ; \ - adcq %rbx, %rdx ; \ - adcl %ecx, %ecx ; \ + xorl %ecx, %ecx ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rbx; \ + movq %rdx, d0 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rbx; \ + addq d0, %rax ; \ + adcq %rbx, %rdx ; \ + adcl %ecx, %ecx ; \ /* Now subtract that and add 2^384 * w */ \ - subq %rax, d1 ; \ - sbbq %rdx, d2 ; \ - sbbq %rcx, d3 ; \ - sbbq $0, d4 ; \ - sbbq $0, d5 ; \ - sbbq $0, %rbx ; \ - addq %rbx, d6 ; \ - adcq $0, d7 + subq %rax, d1 ; \ + sbbq %rdx, d2 ; \ + sbbq %rcx, d3 ; \ + sbbq $0, d4 ; \ + sbbq $0, d5 ; \ + sbbq $0, %rbx ; \ + addq %rbx, d6 ; \ + adcq $0, d7 bignum_tomont_p384_alt: _bignum_tomont_p384_alt: diff --git a/x86_att/p384/bignum_triple_p384.S b/x86_att/p384/bignum_triple_p384.S index a60ee35f75..05464aa711 100644 --- a/x86_att/p384/bignum_triple_p384.S +++ b/x86_att/p384/bignum_triple_p384.S @@ -53,90 +53,90 @@ _bignum_triple_p384: // We seem to need (just!) one extra register, which we need to save and restore - pushq %rbx + pushq %rbx // Multiply, accumulating the result as 2^384 * h + [d5;d4;d3;d2;d1;d0] // but actually immediately producing q = h + 1, our quotient approximation, // by adding 1 to it. - xorl ashort, ashort - - movq (x), q - movq q, d0 - adcxq q, q - adoxq q, d0 - movq 8(x), q - movq q, d1 - adcxq q, q - adoxq q, d1 - movq 16(x), q - movq q, d2 - adcxq q, q - adoxq q, d2 - movq 24(x), q - movq q, d3 - adcxq q, q - adoxq q, d3 - movq 32(x), q - movq q, d4 - adcxq q, q - adoxq q, d4 - movq 40(x), q - movq q, d5 - adcxq q, q - adoxq q, d5 - - movl $1, qshort - adcxq a, q - adoxq a, q + xorl ashort, ashort + + movq (x), q + movq q, d0 + adcxq q, q + adoxq q, d0 + movq 8(x), q + movq q, d1 + adcxq q, q + adoxq q, d1 + movq 16(x), q + movq q, d2 + adcxq q, q + adoxq q, d2 + movq 24(x), q + movq q, d3 + adcxq q, q + adoxq q, d3 + movq 32(x), q + movq q, d4 + adcxq q, q + adoxq q, d4 + movq 40(x), q + movq q, d5 + adcxq q, q + adoxq q, d5 + + movl $1, qshort + adcxq a, q + adoxq a, q // Initial subtraction of z - q * p_384, with bitmask c for the carry // Actually done as an addition of (z - 2^384 * h) + q * (2^384 - p_384) // which, because q = h + 1, is exactly 2^384 + (z - q * p_384), and // therefore CF <=> 2^384 + (z - q * p_384) >= 2^384 <=> z >= q * p_384. - movq q, c - shlq $32, c - movq q, a - subq c, a - sbbq $0, c - - addq a, d0 - adcq c, d1 - adcq q, d2 - adcq $0, d3 - adcq $0, d4 - adcq $0, d5 - sbbq c, c - notq c + movq q, c + shlq $32, c + movq q, a + subq c, a + sbbq $0, c + + addq a, d0 + adcq c, d1 + adcq q, d2 + adcq $0, d3 + adcq $0, d4 + adcq $0, d5 + sbbq c, c + notq c // Now use that mask for a masked addition of p_384, which again is in // fact done by a masked subtraction of 2^384 - p_384, so that we only // have three nonzero digits and so can avoid using another register. - movl $0x00000000ffffffff, qshort - xorl ashort, ashort - andq c, q - subq q, a - negq c - - subq a, d0 - movq d0, (z) - sbbq q, d1 - movq d1, 8(z) - sbbq c, d2 - movq d2, 16(z) - sbbq $0, d3 - movq d3, 24(z) - sbbq $0, d4 - movq d4, 32(z) - sbbq $0, d5 - movq d5, 40(z) + movl $0x00000000ffffffff, qshort + xorl ashort, ashort + andq c, q + subq q, a + negq c + + subq a, d0 + movq d0, (z) + sbbq q, d1 + movq d1, 8(z) + sbbq c, d2 + movq d2, 16(z) + sbbq $0, d3 + movq d3, 24(z) + sbbq $0, d4 + movq d4, 32(z) + sbbq $0, d5 + movq d5, 40(z) // Return - popq %rbx - ret + popq %rbx + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p384/bignum_triple_p384_alt.S b/x86_att/p384/bignum_triple_p384_alt.S index c27f1f6840..9fb4a9df0d 100644 --- a/x86_att/p384/bignum_triple_p384_alt.S +++ b/x86_att/p384/bignum_triple_p384_alt.S @@ -56,97 +56,97 @@ _bignum_triple_p384_alt: // We seem to need (just!) one extra register, which we need to save and restore - pushq %rbx + pushq %rbx // Multiply, accumulating the result as 2^384 * h + [d5;d4;d3;d2;d1;d0] // but actually immediately producing q = h + 1, our quotient approximation, // by adding 1 to it. - movl $3, cshort + movl $3, cshort - movq (x), a - mulq c - movq a, d0 - movq d, d1 + movq (x), a + mulq c + movq a, d0 + movq d, d1 - movq 8(x), a - xorq d2, d2 - mulq c - addq a, d1 - adcq d, d2 + movq 8(x), a + xorq d2, d2 + mulq c + addq a, d1 + adcq d, d2 - movq 16(x), a - xorq d3, d3 - mulq c - addq a, d2 - adcq d, d3 + movq 16(x), a + xorq d3, d3 + mulq c + addq a, d2 + adcq d, d3 - movq 24(x), a - xorq d4, d4 - mulq c - addq a, d3 - adcq d, d4 + movq 24(x), a + xorq d4, d4 + mulq c + addq a, d3 + adcq d, d4 - movq 32(x), a - mulq c - addq a, d4 - adcq $0, d + movq 32(x), a + mulq c + addq a, d4 + adcq $0, d - movq 40(x), a - movq d, d5 - mulq c - addq a, d5 + movq 40(x), a + movq d, d5 + mulq c + addq a, d5 - movl $1, qshort - adcq d, q + movl $1, qshort + adcq d, q // Initial subtraction of z - q * p_384, with bitmask c for the carry // Actually done as an addition of (z - 2^384 * h) + q * (2^384 - p_384) // which, because q = h + 1, is exactly 2^384 + (z - q * p_384), and // therefore CF <=> 2^384 + (z - q * p_384) >= 2^384 <=> z >= q * p_384. - movq q, d - shlq $32, d - movq q, a - subq d, a - sbbq $0, d - - addq a, d0 - adcq d, d1 - adcq q, d2 - adcq $0, d3 - adcq $0, d4 - adcq $0, d5 - sbbq d, d - notq d + movq q, d + shlq $32, d + movq q, a + subq d, a + sbbq $0, d + + addq a, d0 + adcq d, d1 + adcq q, d2 + adcq $0, d3 + adcq $0, d4 + adcq $0, d5 + sbbq d, d + notq d // Now use that mask for a masked addition of p_384, which again is in // fact done by a masked subtraction of 2^384 - p_384, so that we only // have three nonzero digits and so can avoid using another register. - movl $0x00000000ffffffff, qshort - xorl ashort, ashort - andq d, q - subq q, a - negq d - - subq a, d0 - movq d0, (z) - sbbq q, d1 - movq d1, 8(z) - sbbq d, d2 - movq d2, 16(z) - sbbq $0, d3 - movq d3, 24(z) - sbbq $0, d4 - movq d4, 32(z) - sbbq $0, d5 - movq d5, 40(z) + movl $0x00000000ffffffff, qshort + xorl ashort, ashort + andq d, q + subq q, a + negq d + + subq a, d0 + movq d0, (z) + sbbq q, d1 + movq d1, 8(z) + sbbq d, d2 + movq d2, 16(z) + sbbq $0, d3 + movq d3, 24(z) + sbbq $0, d4 + movq d4, 32(z) + sbbq $0, d5 + movq d5, 40(z) // Return - popq %rbx - ret + popq %rbx + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_cmul_p521.S b/x86_att/p521/bignum_cmul_p521.S index 02cdb893ba..04601fe680 100644 --- a/x86_att/p521/bignum_cmul_p521.S +++ b/x86_att/p521/bignum_cmul_p521.S @@ -65,36 +65,36 @@ _bignum_cmul_p521: // Save additional registers to use - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 // Shuffle inputs (since we want the multiplier in %rdx) - movq %rdx, x - movq %rsi, c + movq %rdx, x + movq %rsi, c // Multiply as [d9; ...; d0] = c * x. - mulxq (x), d0, d1 - mulxq 8(x), a, d2 - addq a, d1 - mulxq 16(x), a, d3 - adcq a, d2 - mulxq 24(x), a, d4 - adcq a, d3 - mulxq 32(x), a, d5 - adcq a, d4 - mulxq 40(x), a, d6 - adcq a, d5 - mulxq 48(x), a, d7 - adcq a, d6 - mulxq 56(x), a, d8 - adcq a, d7 - mulxq 64(x), a, d9 - adcq a, d8 - adcq $0, d9 + mulxq (x), d0, d1 + mulxq 8(x), a, d2 + addq a, d1 + mulxq 16(x), a, d3 + adcq a, d2 + mulxq 24(x), a, d4 + adcq a, d3 + mulxq 32(x), a, d5 + adcq a, d4 + mulxq 40(x), a, d6 + adcq a, d5 + mulxq 48(x), a, d7 + adcq a, d6 + mulxq 56(x), a, d8 + adcq a, d7 + mulxq 64(x), a, d9 + adcq a, d8 + adcq $0, d9 // Create an AND "dd" of digits d7,...,d1, a computation we hope will // get nicely interleaved with the multiplication chain above. @@ -102,19 +102,19 @@ _bignum_cmul_p521: // bunch it up here since AND destroys the flags and we overwrite the // register used as a stage temporary variable for the multiplications. - movq d1, dd - andq d2, dd - andq d3, dd - andq d4, dd - andq d5, dd - andq d6, dd - andq d7, dd + movq d1, dd + andq d2, dd + andq d3, dd + andq d4, dd + andq d5, dd + andq d6, dd + andq d7, dd // Extract the high part h==d9 and mask off the low part l = [d8;d7;...;d0] // but stuff d8 with 1 bits at the left to ease a comparison below - shldq $55, d8, h - orq $~0x1FF, d8 + shldq $55, d8, h + orq $~0x1FF, d8 // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits d7,...d1 are all 1s, we use the AND of them "dd" to @@ -123,46 +123,46 @@ _bignum_cmul_p521: // Since x was assumed reduced, h cannot be maximal, so the "lea" is safe, // i.e. does not carry or wrap round. - leaq 1(h), c - addq d0, c - movl $0, cshort - adcq c, dd - movq d8, a - adcq c, a + leaq 1(h), c + addq d0, c + movl $0, cshort + adcq c, dd + movq d8, a + adcq c, a // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // This masking also gets rid of the stuffing with 1s we did above. // Write back the digits as they are generated. - adcq h, d0 - movq d0, (z) - adcq c, d1 - movq d1, 8(z) - adcq c, d2 - movq d2, 16(z) - adcq c, d3 - movq d3, 24(z) - adcq c, d4 - movq d4, 32(z) - adcq c, d5 - movq d5, 40(z) - adcq c, d6 - movq d6, 48(z) - adcq c, d7 - movq d7, 56(z) - adcq c, d8 - andq $0x1FF, d8 - movq d8, 64(z) + adcq h, d0 + movq d0, (z) + adcq c, d1 + movq d1, 8(z) + adcq c, d2 + movq d2, 16(z) + adcq c, d3 + movq d3, 24(z) + adcq c, d4 + movq d4, 32(z) + adcq c, d5 + movq d5, 40(z) + adcq c, d6 + movq d6, 48(z) + adcq c, d7 + movq d7, 56(z) + adcq c, d8 + andq $0x1FF, d8 + movq d8, 64(z) // Restore registers and return - popq %r13 - popq %r12 - popq %rbp - popq %rbx + popq %r13 + popq %r12 + popq %rbp + popq %rbx - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_cmul_p521_alt.S b/x86_att/p521/bignum_cmul_p521_alt.S index dc9815a4e6..8dc1b72f4f 100644 --- a/x86_att/p521/bignum_cmul_p521_alt.S +++ b/x86_att/p521/bignum_cmul_p521_alt.S @@ -69,86 +69,86 @@ _bignum_cmul_p521_alt: // Save additional registers to use - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 // Shuffle inputs (since we want %rdx for the high parts of products) - movq %rdx, x + movq %rdx, x // Multiply as [d9; ...; d0] = c * x. - movq (x), a - mulq m - movq a, d0 - movq d, d1 - - movq 8(x), a - mulq m - xorq d2, d2 - addq a, d1 - adcq d, d2 - - movq 16(x), a - mulq m - xorq d3, d3 - addq a, d2 - adcq d, d3 - - movq 24(x), a - mulq m - xorq d4, d4 - addq a, d3 - adcq d, d4 - - movq 32(x), a - mulq m - xorq d5, d5 - addq a, d4 - adcq d, d5 - - movq 40(x), a - mulq m - xorq d6, d6 - addq a, d5 - adcq d, d6 - - movq 48(x), a - mulq m - xorq d7, d7 - addq a, d6 - adcq d, d7 - - movq 56(x), a - mulq m - addq a, d7 - movq 64(x), a - movq $0, d8 - adcq d, d8 - mulq m - xorq d9, d9 - addq a, d8 - adcq d, d9 + movq (x), a + mulq m + movq a, d0 + movq d, d1 + + movq 8(x), a + mulq m + xorq d2, d2 + addq a, d1 + adcq d, d2 + + movq 16(x), a + mulq m + xorq d3, d3 + addq a, d2 + adcq d, d3 + + movq 24(x), a + mulq m + xorq d4, d4 + addq a, d3 + adcq d, d4 + + movq 32(x), a + mulq m + xorq d5, d5 + addq a, d4 + adcq d, d5 + + movq 40(x), a + mulq m + xorq d6, d6 + addq a, d5 + adcq d, d6 + + movq 48(x), a + mulq m + xorq d7, d7 + addq a, d6 + adcq d, d7 + + movq 56(x), a + mulq m + addq a, d7 + movq 64(x), a + movq $0, d8 + adcq d, d8 + mulq m + xorq d9, d9 + addq a, d8 + adcq d, d9 // Create an AND "dd" of digits d7,...,d1, a computation we hope will // get nicely interleaved with the multiplication chain above, though // we can't do so directly as we are using the same register %rax. - movq d1, dd - andq d2, dd - andq d3, dd - andq d4, dd - andq d5, dd - andq d6, dd - andq d7, dd + movq d1, dd + andq d2, dd + andq d3, dd + andq d4, dd + andq d5, dd + andq d6, dd + andq d7, dd // Extract the high part h==d9 and mask off the low part l = [d8;d7;...;d0] // but stuff d8 with 1 bits at the left to ease a comparison below - shldq $55, d8, h - orq $~0x1FF, d8 + shldq $55, d8, h + orq $~0x1FF, d8 // Decide whether h + l >= p_521 <=> h + l + 1 >= 2^521. Since this can only // happen if digits d7,...d1 are all 1s, we use the AND of them "dd" to @@ -157,46 +157,46 @@ _bignum_cmul_p521_alt: // Since x was assumed reduced, h cannot be maximal, so the "lea" is safe, // i.e. does not carry or wrap round. - leaq 1(h), c - addq d0, c - movl $0, cshort - adcq c, dd - movq d8, a - adcq c, a + leaq 1(h), c + addq d0, c + movl $0, cshort + adcq c, dd + movq d8, a + adcq c, a // Now if CF is set we want (h + l) - p_521 = (h + l + 1) - 2^521 // while otherwise we want just h + l. So mask h + l + CF to 521 bits. // This masking also gets rid of the stuffing with 1s we did above. // Write back the digits as they are generated. - adcq h, d0 - movq d0, (z) - adcq c, d1 - movq d1, 8(z) - adcq c, d2 - movq d2, 16(z) - adcq c, d3 - movq d3, 24(z) - adcq c, d4 - movq d4, 32(z) - adcq c, d5 - movq d5, 40(z) - adcq c, d6 - movq d6, 48(z) - adcq c, d7 - movq d7, 56(z) - adcq c, d8 - andq $0x1FF, d8 - movq d8, 64(z) + adcq h, d0 + movq d0, (z) + adcq c, d1 + movq d1, 8(z) + adcq c, d2 + movq d2, 16(z) + adcq c, d3 + movq d3, 24(z) + adcq c, d4 + movq d4, 32(z) + adcq c, d5 + movq d5, 40(z) + adcq c, d6 + movq d6, 48(z) + adcq c, d7 + movq d7, 56(z) + adcq c, d8 + andq $0x1FF, d8 + movq d8, 64(z) // Restore registers and return - popq %r13 - popq %r12 - popq %rbp - popq %rbx + popq %r13 + popq %r12 + popq %rbp + popq %rbx - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_fromlebytes_p521.S b/x86_att/p521/bignum_fromlebytes_p521.S index 2d0069c46a..4da552b090 100644 --- a/x86_att/p521/bignum_fromlebytes_p521.S +++ b/x86_att/p521/bignum_fromlebytes_p521.S @@ -39,35 +39,35 @@ bignum_fromlebytes_p521: _bignum_fromlebytes_p521: - movq (x), a - movq a, (z) + movq (x), a + movq a, (z) - movq 8(x), a - movq a, 8(z) + movq 8(x), a + movq a, 8(z) - movq 16(x), a - movq a, 16(z) + movq 16(x), a + movq a, 16(z) - movq 24(x), a - movq a, 24(z) + movq 24(x), a + movq a, 24(z) - movq 32(x), a - movq a, 32(z) + movq 32(x), a + movq a, 32(z) - movq 40(x), a - movq a, 40(z) + movq 40(x), a + movq a, 40(z) - movq 48(x), a - movq a, 48(z) + movq 48(x), a + movq a, 48(z) - movq 56(x), a - movq a, 56(z) + movq 56(x), a + movq a, 56(z) - xorl %eax, %eax - movw 64(x), %ax - movq a, 64(z) + xorl %eax, %eax + movw 64(x), %ax + movq a, 64(z) - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_half_p521.S b/x86_att/p521/bignum_half_p521.S index bdca1839d6..84b244a238 100644 --- a/x86_att/p521/bignum_half_p521.S +++ b/x86_att/p521/bignum_half_p521.S @@ -52,49 +52,49 @@ _bignum_half_p521: // We do a 521-bit rotation one bit right, since 2^521 == 1 (mod p_521) - movq (x), d0 - movl $1, ashort - andq d0, a + movq (x), d0 + movl $1, ashort + andq d0, a - movq 8(x), d1 - shrdq $1, d1, d0 - movq d0, (z) + movq 8(x), d1 + shrdq $1, d1, d0 + movq d0, (z) - movq 16(x), d2 - shrdq $1, d2, d1 - movq d1, 8(z) + movq 16(x), d2 + shrdq $1, d2, d1 + movq d1, 8(z) - movq 24(x), d3 - shrdq $1, d3, d2 - movq d2, 16(z) + movq 24(x), d3 + shrdq $1, d3, d2 + movq d2, 16(z) - movq 32(x), d4 - shrdq $1, d4, d3 - movq d3, 24(z) + movq 32(x), d4 + shrdq $1, d4, d3 + movq d3, 24(z) - movq 40(x), d5 - shrdq $1, d5, d4 - movq d4, 32(z) + movq 40(x), d5 + shrdq $1, d5, d4 + movq d4, 32(z) - movq 48(x), d6 - shrdq $1, d6, d5 - movq d5, 40(z) + movq 48(x), d6 + shrdq $1, d6, d5 + movq d5, 40(z) - movq 56(x), d7 - shrdq $1, d7, d6 - movq d6, 48(z) + movq 56(x), d7 + shrdq $1, d7, d6 + movq d6, 48(z) - movq 64(x), d8 - shrdq $1, d8, d7 - movq d7, 56(z) + movq 64(x), d8 + shrdq $1, d8, d7 + movq d7, 56(z) - shlq $55, d8 - shrdq $56, a, d8 - movq d8, 64(z) + shlq $55, d8 + shrdq $56, a, d8 + movq d8, 64(z) // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_neg_p521.S b/x86_att/p521/bignum_neg_p521.S index d3f4c09a6f..5c865180bc 100644 --- a/x86_att/p521/bignum_neg_p521.S +++ b/x86_att/p521/bignum_neg_p521.S @@ -43,57 +43,57 @@ _bignum_neg_p521: // Load most inputs (into the limited registers) and OR all of them to get p - movq (x), d0 - movq d0, p - movq 8(x), d1 - orq d1, p - movq 16(x), d2 - orq d2, p - movq 24(x), d3 - orq d3, p - movq 32(x), d4 - orq d4, p - movq 40(x), d5 - orq d5, p - orq 48(x), p - orq 56(x), p - orq 64(x), p + movq (x), d0 + movq d0, p + movq 8(x), d1 + orq d1, p + movq 16(x), d2 + orq d2, p + movq 24(x), d3 + orq d3, p + movq 32(x), d4 + orq d4, p + movq 40(x), d5 + orq d5, p + orq 48(x), p + orq 56(x), p + orq 64(x), p // Turn p into a bitmask for "input is nonzero", so that we avoid doing // -0 = p_521 and hence maintain strict modular reduction - negq p - sbbq p, p + negq p + sbbq p, p // Since p_521 is all 1s, the subtraction is just an exclusive-or with p // to give an optional inversion, with a slight fiddle for the top digit. - xorq p, d0 - movq d0, (z) - xorq p, d1 - movq d1, 8(z) - xorq p, d2 - movq d2, 16(z) - xorq p, d3 - movq d3, 24(z) - xorq p, d4 - movq d4, 32(z) - xorq p, d5 - movq d5, 40(z) - movq 48(x), d0 - xorq p, d0 - movq d0, 48(z) - movq 56(x), d1 - xorq p, d1 - movq d1, 56(z) - movq 64(x), d2 - andq $0x1FF, p - xorq p, d2 - movq d2, 64(z) + xorq p, d0 + movq d0, (z) + xorq p, d1 + movq d1, 8(z) + xorq p, d2 + movq d2, 16(z) + xorq p, d3 + movq d3, 24(z) + xorq p, d4 + movq d4, 32(z) + xorq p, d5 + movq d5, 40(z) + movq 48(x), d0 + xorq p, d0 + movq d0, 48(z) + movq 56(x), d1 + xorq p, d1 + movq d1, 56(z) + movq 64(x), d2 + andq $0x1FF, p + xorq p, d2 + movq d2, 64(z) // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_optneg_p521.S b/x86_att/p521/bignum_optneg_p521.S index b4e87a5f70..ebd48f5d2f 100644 --- a/x86_att/p521/bignum_optneg_p521.S +++ b/x86_att/p521/bignum_optneg_p521.S @@ -45,59 +45,59 @@ _bignum_optneg_p521: // Load most inputs (into the limited registers) and OR all of them to get q - movq (x), d0 - movq d0, q - movq 8(x), d1 - orq d1, q - movq 16(x), d2 - orq d2, q - movq 24(x), d3 - orq d3, q - movq 32(x), d4 - orq d4, q - orq 40(x), q - orq 48(x), q - orq 56(x), q - orq 64(x), q + movq (x), d0 + movq d0, q + movq 8(x), d1 + orq d1, q + movq 16(x), d2 + orq d2, q + movq 24(x), d3 + orq d3, q + movq 32(x), d4 + orq d4, q + orq 40(x), q + orq 48(x), q + orq 56(x), q + orq 64(x), q // Turn q into a bitmask for "input is nonzero and p is nonzero", so that // we avoid doing -0 = p_521 and hence maintain strict modular reduction - negq q - sbbq q, q - testq p, p - cmovzq p, q + negq q + sbbq q, q + testq p, p + cmovzq p, q // Since p_521 is all 1s, the subtraction is just an exclusive-or with q // to give an optional inversion, with a slight fiddle for the top digit. - xorq q, d0 - movq d0, (z) - xorq q, d1 - movq d1, 8(z) - xorq q, d2 - movq d2, 16(z) - xorq q, d3 - movq d3, 24(z) - xorq q, d4 - movq d4, 32(z) - movq 40(x), d0 - xorq q, d0 - movq d0, 40(z) - movq 48(x), d1 - xorq q, d1 - movq d1, 48(z) - movq 56(x), d2 - xorq q, d2 - movq d2, 56(z) - movq 64(x), d3 - andq $0x1FF, q - xorq q, d3 - movq d3, 64(z) + xorq q, d0 + movq d0, (z) + xorq q, d1 + movq d1, 8(z) + xorq q, d2 + movq d2, 16(z) + xorq q, d3 + movq d3, 24(z) + xorq q, d4 + movq d4, 32(z) + movq 40(x), d0 + xorq q, d0 + movq d0, 40(z) + movq 48(x), d1 + xorq q, d1 + movq d1, 48(z) + movq 56(x), d2 + xorq q, d2 + movq d2, 56(z) + movq 64(x), d3 + andq $0x1FF, q + xorq q, d3 + movq d3, 64(z) // Return - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_tolebytes_p521.S b/x86_att/p521/bignum_tolebytes_p521.S index a0b13f03f8..b64fc9bbe7 100644 --- a/x86_att/p521/bignum_tolebytes_p521.S +++ b/x86_att/p521/bignum_tolebytes_p521.S @@ -39,34 +39,34 @@ bignum_tolebytes_p521: _bignum_tolebytes_p521: - movq (x), a - movq a, (z) + movq (x), a + movq a, (z) - movq 8(x), a - movq a, 8(z) + movq 8(x), a + movq a, 8(z) - movq 16(x), a - movq a, 16(z) + movq 16(x), a + movq a, 16(z) - movq 24(x), a - movq a, 24(z) + movq 24(x), a + movq a, 24(z) - movq 32(x), a - movq a, 32(z) + movq 32(x), a + movq a, 32(z) - movq 40(x), a - movq a, 40(z) + movq 40(x), a + movq a, 40(z) - movq 48(x), a - movq a, 48(z) + movq 48(x), a + movq a, 48(z) - movq 56(x), a - movq a, 56(z) + movq 56(x), a + movq a, 56(z) - movq 64(x), a - movw %ax, 64(z) + movq 64(x), a + movw %ax, 64(z) - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_triple_p521.S b/x86_att/p521/bignum_triple_p521.S index e509fa5a29..0b1f397cad 100644 --- a/x86_att/p521/bignum_triple_p521.S +++ b/x86_att/p521/bignum_triple_p521.S @@ -53,103 +53,103 @@ _bignum_triple_p521: // Save more registers to play with - pushq %rbx - pushq %r12 + pushq %rbx + pushq %r12 // Load the top (short) word first to compute the initial carry-in // Set OF according to bit 520, but *always* set CF to get a +1 bump - movq 64(x), m - movq m, d8 - shlq $54, m - addq m, m - stc + movq 64(x), m + movq m, d8 + shlq $54, m + addq m, m + stc // Use a double carry chain to compute x' + x + 1 where x' is a // 1-bit left rotation of x; this is then == 3 * x + 1 (mod p_521) // This gives us s = [d8;d7;d6;d5;d4;d3;d2;d1;d0] = x + x' + 1. - movq (x), m - movq m, d0 - adcxq m, m - adoxq m, d0 - movq 8(x), m - movq m, d1 - adcxq m, m - adoxq m, d1 - movq 16(x), m - movq m, d2 - adcxq m, m - adoxq m, d2 - movq 24(x), m - movq m, d3 - adcxq m, m - adoxq m, d3 - movq 32(x), m - movq m, d4 - adcxq m, m - adoxq m, d4 - movq 40(x), m - movq m, d5 - adcxq m, m - adoxq m, d5 - movq 48(x), m - movq m, d6 - adcxq m, m - adoxq m, d6 - movq 56(x), m - movq m, d7 - adcxq m, m - adoxq m, d7 + movq (x), m + movq m, d0 + adcxq m, m + adoxq m, d0 + movq 8(x), m + movq m, d1 + adcxq m, m + adoxq m, d1 + movq 16(x), m + movq m, d2 + adcxq m, m + adoxq m, d2 + movq 24(x), m + movq m, d3 + adcxq m, m + adoxq m, d3 + movq 32(x), m + movq m, d4 + adcxq m, m + adoxq m, d4 + movq 40(x), m + movq m, d5 + adcxq m, m + adoxq m, d5 + movq 48(x), m + movq m, d6 + adcxq m, m + adoxq m, d6 + movq 56(x), m + movq m, d7 + adcxq m, m + adoxq m, d7 // The last word is slightly more intricate: we naturally end up adding // 2 * top bit when we shouldn't (because it's a rotation and we've already // added it at the LSB position) but then compensate by subtracting it. - movq d8, m - adcxq m, m - adoxq m, d8 - andq $0x200, m - subq m, d8 + movq d8, m + adcxq m, m + adoxq m, d8 + andq $0x200, m + subq m, d8 // Now x + x' >= p_521 <=> s = x + x' + 1 >= 2^521 // Make m = 512 * [x + x' >= p_521] - movl $512, mshort - andq d8, m + movl $512, mshort + andq d8, m // Now if x + x' >= p_521, we want (x + x') - p_521 = s - 2^521 // while otherwise we want x + x' = s - 1 // We use the mask m both as an operand and to generate the dual carry // Write back the results as generated - cmpq $512, m - - sbbq $0, d0 - movq d0, (z) - sbbq $0, d1 - movq d1, 8(z) - sbbq $0, d2 - movq d2, 16(z) - sbbq $0, d3 - movq d3, 24(z) - sbbq $0, d4 - movq d4, 32(z) - sbbq $0, d5 - movq d5, 40(z) - sbbq $0, d6 - movq d6, 48(z) - sbbq $0, d7 - movq d7, 56(z) - sbbq m, d8 - movq d8, 64(z) + cmpq $512, m + + sbbq $0, d0 + movq d0, (z) + sbbq $0, d1 + movq d1, 8(z) + sbbq $0, d2 + movq d2, 16(z) + sbbq $0, d3 + movq d3, 24(z) + sbbq $0, d4 + movq d4, 32(z) + sbbq $0, d5 + movq d5, 40(z) + sbbq $0, d6 + movq d6, 48(z) + sbbq $0, d7 + movq d7, 56(z) + sbbq m, d8 + movq d8, 64(z) // Restore registers and return - popq %r12 - popq %rbx + popq %r12 + popq %rbx - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/x86_att/p521/bignum_triple_p521_alt.S b/x86_att/p521/bignum_triple_p521_alt.S index f1193f28d3..966b42408d 100644 --- a/x86_att/p521/bignum_triple_p521_alt.S +++ b/x86_att/p521/bignum_triple_p521_alt.S @@ -54,9 +54,9 @@ _bignum_triple_p521_alt: // Save additional registers to use - pushq %rbx - pushq %rbp - pushq %r12 + pushq %rbx + pushq %rbp + pushq %r12 // Let [d8;...;d0] = x' + x + 1 where x' is a rotation left by 1 bit // as a 521-bit quantity. This is == 3 * x + 1 (mod p_521) and keeps @@ -65,99 +65,99 @@ _bignum_triple_p521_alt: // at the bottom instead of the top, so the top two digits are a bit // more intricate. - movq $3, m - movq 64(x), d0 - shrq $8, d0 - incq d0 - - movq (x), a - mulq m - xorq d1, d1 - addq a, d0 - adcq d, d1 - - movq 8(x), a - mulq m - xorq d2, d2 - addq a, d1 - adcq d, d2 - - movq 16(x), a - mulq m - xorq d3, d3 - addq a, d2 - adcq d, d3 - - movq 24(x), a - mulq m - xorq d4, d4 - addq a, d3 - adcq d, d4 - - movq 32(x), a - mulq m - xorq d5, d5 - addq a, d4 - adcq d, d5 - - movq 40(x), a - mulq m - xorq d6, d6 - addq a, d5 - adcq d, d6 - - movq 48(x), a - mulq m - movq 56(x), d7 - movq 64(x), d8 - addq a, d6 - adcq $0, d - - movq $0xFF, a - andq d8, a - leaq (d8,a,2), d8 - - xorl %eax, %eax - addq d7, d - adcq a, d8 - addq d7, d7 - adcq a, d8 - addq d, d7 - adcq a, d8 + movq $3, m + movq 64(x), d0 + shrq $8, d0 + incq d0 + + movq (x), a + mulq m + xorq d1, d1 + addq a, d0 + adcq d, d1 + + movq 8(x), a + mulq m + xorq d2, d2 + addq a, d1 + adcq d, d2 + + movq 16(x), a + mulq m + xorq d3, d3 + addq a, d2 + adcq d, d3 + + movq 24(x), a + mulq m + xorq d4, d4 + addq a, d3 + adcq d, d4 + + movq 32(x), a + mulq m + xorq d5, d5 + addq a, d4 + adcq d, d5 + + movq 40(x), a + mulq m + xorq d6, d6 + addq a, d5 + adcq d, d6 + + movq 48(x), a + mulq m + movq 56(x), d7 + movq 64(x), d8 + addq a, d6 + adcq $0, d + + movq $0xFF, a + andq d8, a + leaq (d8,a,2), d8 + + xorl %eax, %eax + addq d7, d + adcq a, d8 + addq d7, d7 + adcq a, d8 + addq d, d7 + adcq a, d8 // Now d8 >= 2^9 <=> x' + x + 1 >= 2^521 <=> x' + x >= p_521. // If that is the case we want (x' + x) - p_521 = (x' + x + 1) - 2^521 // while otherwise we want just x' + x = (x' + x + 1) - 1. - cmpq $0x200, d8 - - sbbq a, d0 - movq d0, (z) - sbbq a, d1 - movq d1, 8(z) - sbbq a, d2 - movq d2, 16(z) - sbbq a, d3 - movq d3, 24(z) - sbbq a, d4 - movq d4, 32(z) - sbbq a, d5 - movq d5, 40(z) - sbbq a, d6 - movq d6, 48(z) - sbbq a, d7 - movq d7, 56(z) - sbbq a, d8 - andq $0x1FF, d8 - movq d8, 64(z) + cmpq $0x200, d8 + + sbbq a, d0 + movq d0, (z) + sbbq a, d1 + movq d1, 8(z) + sbbq a, d2 + movq d2, 16(z) + sbbq a, d3 + movq d3, 24(z) + sbbq a, d4 + movq d4, 32(z) + sbbq a, d5 + movq d5, 40(z) + sbbq a, d6 + movq d6, 48(z) + sbbq a, d7 + movq d7, 56(z) + sbbq a, d8 + andq $0x1FF, d8 + movq d8, 64(z) // Restore registers and return - popq %r12 - popq %rbp - popq %rbx + popq %r12 + popq %rbp + popq %rbx - ret + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits From 9630338de16837caf2e52b2593724c8d16cd7098 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 30 Mar 2022 17:44:08 -0700 Subject: [PATCH 03/42] Add Windows ABI wrappers The x86 ABI on Windows differs from the "standard" one used on Linux, Mac OS etc. For our purposes the relevant differences are that RDI and RSI are callee-saved (need to be preserved if modified), and that the input arguments are in different places. Integer return values are still in RAX. Here are the conventions for the first 6 integer arguments (the maximum that s2n-bignum functions rely on so far): Argument number Standard Windows 1 RDI RCX 2 RSI RDX 3 RDX R8 4 RCX R9 5 R8 [RSP+40] 6 R9 [RSP+48] Here we add a wrapper round each s2n-bignum function that optionally (controlled by the WINDOWS_ABI variable in preprocessing) makes the function compatible with the Windows ABI. This is done in a simplistic way simply by pushing the extra callee-saved registers (RSI and RDI), shuffling arguments (only the ones actually used by that function), performing the same computation in exactly the original way, and finally popping back the extra registers. The Makefile currently selects this WINDOWS_ABI mode when `uname -s` gives "CYGWIN_NT-10.0", as on the main test platform, but this can be configured. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/8ac40107b3ac9b57a1027a07a6bac5065be6b81e --- x86_att/curve25519/bignum_neg_p25519.S | 12 ++++++++++++ x86_att/p384/bignum_add_p384.S | 13 +++++++++++++ x86_att/p384/bignum_bigendian_6.S | 12 ++++++++++++ x86_att/p384/bignum_cmul_p384.S | 13 +++++++++++++ x86_att/p384/bignum_cmul_p384_alt.S | 13 +++++++++++++ x86_att/p384/bignum_deamont_p384.S | 12 ++++++++++++ x86_att/p384/bignum_deamont_p384_alt.S | 12 ++++++++++++ x86_att/p384/bignum_demont_p384.S | 12 ++++++++++++ x86_att/p384/bignum_demont_p384_alt.S | 12 ++++++++++++ x86_att/p384/bignum_double_p384.S | 12 ++++++++++++ x86_att/p384/bignum_half_p384.S | 12 ++++++++++++ x86_att/p384/bignum_littleendian_6.S | 12 ++++++++++++ x86_att/p384/bignum_mod_n384.S | 13 +++++++++++++ x86_att/p384/bignum_mod_n384_6.S | 12 ++++++++++++ x86_att/p384/bignum_mod_n384_alt.S | 13 +++++++++++++ x86_att/p384/bignum_mod_p384.S | 13 +++++++++++++ x86_att/p384/bignum_mod_p384_6.S | 12 ++++++++++++ x86_att/p384/bignum_mod_p384_alt.S | 13 +++++++++++++ x86_att/p384/bignum_montmul_p384.S | 13 +++++++++++++ x86_att/p384/bignum_montmul_p384_alt.S | 13 +++++++++++++ x86_att/p384/bignum_montsqr_p384.S | 12 ++++++++++++ x86_att/p384/bignum_montsqr_p384_alt.S | 12 ++++++++++++ x86_att/p384/bignum_mux_6.S | 14 ++++++++++++++ x86_att/p384/bignum_neg_p384.S | 12 ++++++++++++ x86_att/p384/bignum_nonzero_6.S | 11 +++++++++++ x86_att/p384/bignum_optneg_p384.S | 13 +++++++++++++ x86_att/p384/bignum_sub_p384.S | 13 +++++++++++++ x86_att/p384/bignum_tomont_p384.S | 12 ++++++++++++ x86_att/p384/bignum_tomont_p384_alt.S | 12 ++++++++++++ x86_att/p384/bignum_triple_p384.S | 12 ++++++++++++ x86_att/p384/bignum_triple_p384_alt.S | 12 ++++++++++++ x86_att/p521/bignum_add_p521.S | 13 +++++++++++++ x86_att/p521/bignum_cmul_p521.S | 13 +++++++++++++ x86_att/p521/bignum_cmul_p521_alt.S | 13 +++++++++++++ x86_att/p521/bignum_deamont_p521.S | 12 ++++++++++++ x86_att/p521/bignum_demont_p521.S | 12 ++++++++++++ x86_att/p521/bignum_double_p521.S | 12 ++++++++++++ x86_att/p521/bignum_fromlebytes_p521.S | 12 ++++++++++++ x86_att/p521/bignum_half_p521.S | 12 ++++++++++++ x86_att/p521/bignum_mod_n521_9.S | 12 ++++++++++++ x86_att/p521/bignum_mod_n521_9_alt.S | 12 ++++++++++++ x86_att/p521/bignum_mod_p521_9.S | 12 ++++++++++++ x86_att/p521/bignum_montmul_p521.S | 13 +++++++++++++ x86_att/p521/bignum_montmul_p521_alt.S | 13 +++++++++++++ x86_att/p521/bignum_montsqr_p521.S | 12 ++++++++++++ x86_att/p521/bignum_montsqr_p521_alt.S | 12 ++++++++++++ x86_att/p521/bignum_mul_p521.S | 13 +++++++++++++ x86_att/p521/bignum_mul_p521_alt.S | 13 +++++++++++++ x86_att/p521/bignum_neg_p521.S | 12 ++++++++++++ x86_att/p521/bignum_optneg_p521.S | 13 +++++++++++++ x86_att/p521/bignum_sqr_p521.S | 12 ++++++++++++ x86_att/p521/bignum_sqr_p521_alt.S | 12 ++++++++++++ x86_att/p521/bignum_sub_p521.S | 13 +++++++++++++ x86_att/p521/bignum_tolebytes_p521.S | 12 ++++++++++++ x86_att/p521/bignum_tomont_p521.S | 12 ++++++++++++ x86_att/p521/bignum_triple_p521.S | 12 ++++++++++++ x86_att/p521/bignum_triple_p521_alt.S | 12 ++++++++++++ 57 files changed, 705 insertions(+) diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S index 7e69d65164..958f3a62cb 100644 --- a/x86_att/curve25519/bignum_neg_p25519.S +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -21,6 +21,7 @@ // (uint64_t z[static 4], uint64_t x[static 4]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -44,6 +45,13 @@ bignum_neg_p25519: _bignum_neg_p25519: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load the 4 digits of x and let q be an OR of all the digits movq (x), n0 @@ -78,6 +86,10 @@ _bignum_neg_p25519: btr $63, q movq q, 24(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_add_p384.S b/x86_att/p384/bignum_add_p384.S index 33f7f45322..82f513c70f 100644 --- a/x86_att/p384/bignum_add_p384.S +++ b/x86_att/p384/bignum_add_p384.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -52,6 +53,14 @@ bignum_add_p384: _bignum_add_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Add the inputs as 2^384 * c + [d5;d4;d3;d2;d1;d0] = x + y // This could be combined with the next block using ADCX and ADOX. @@ -119,6 +128,10 @@ _bignum_add_p384: sbbq $0, d5 movq d5, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_bigendian_6.S b/x86_att/p384/bignum_bigendian_6.S index 268ed5b90e..7286fec58c 100644 --- a/x86_att/p384/bignum_bigendian_6.S +++ b/x86_att/p384/bignum_bigendian_6.S @@ -33,6 +33,7 @@ // word order, this is simply byte reversal and is implemented as such. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -62,6 +63,13 @@ _bignum_frombebytes_6: bignum_tobebytes_6: _bignum_tobebytes_6: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // 0 and 5 words movq (x), a @@ -89,6 +97,10 @@ _bignum_tobebytes_6: movq a, 24(z) movq b, 16(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_cmul_p384.S b/x86_att/p384/bignum_cmul_p384.S index 13a7c0085d..97618b19b0 100644 --- a/x86_att/p384/bignum_cmul_p384.S +++ b/x86_att/p384/bignum_cmul_p384.S @@ -22,6 +22,7 @@ // (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = c, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- @@ -56,6 +57,14 @@ bignum_cmul_p384: _bignum_cmul_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // We seem to need (just!) one extra register, which we need to save and restore pushq %r12 @@ -141,6 +150,10 @@ _bignum_cmul_p384: // Return popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index 8dd8d76068..93a54b5d60 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -22,6 +22,7 @@ // (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = c, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- @@ -60,6 +61,14 @@ bignum_cmul_p384_alt: _bignum_cmul_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // We seem to need (just!) one extra register, which we need to save and restore pushq %r12 @@ -166,6 +175,10 @@ _bignum_cmul_p384_alt: // Return popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index 733be9a487..18a29f3218 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -24,6 +24,7 @@ // "almost" meaning any 6-digit input will work, with no range restriction. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -82,6 +83,13 @@ bignum_deamont_p384: _bignum_deamont_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %r12 @@ -171,6 +179,10 @@ _bignum_deamont_p384: popq %r13 popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index 6c5a606d19..ca2d56e104 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -24,6 +24,7 @@ // "almost" meaning any 6-digit input will work, with no range restriction. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -82,6 +83,13 @@ bignum_deamont_p384_alt: _bignum_deamont_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %r12 @@ -171,6 +179,10 @@ _bignum_deamont_p384_alt: popq %r13 popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index 059993b8ba..004041fe11 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -24,6 +24,7 @@ // use the variant "bignum_deamont_p384" instead. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -74,6 +75,13 @@ bignum_demont_p384: _bignum_demont_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %r12 @@ -126,6 +134,10 @@ _bignum_demont_p384: popq %r13 popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index 1ca60c55d2..49997c920c 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -24,6 +24,7 @@ // use the variant "bignum_deamont_p384" instead. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -74,6 +75,13 @@ bignum_demont_p384_alt: _bignum_demont_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %r12 @@ -126,6 +134,10 @@ _bignum_demont_p384_alt: popq %r13 popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_double_p384.S b/x86_att/p384/bignum_double_p384.S index dcc0a72629..20b47c5c03 100644 --- a/x86_att/p384/bignum_double_p384.S +++ b/x86_att/p384/bignum_double_p384.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -50,6 +51,13 @@ bignum_double_p384: _bignum_double_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load the input and double it so that 2^384 * c + [d5;d4;d3;d2;d1;d0] = 2 * x // Could also consider using shld to decouple carries *or* combining this // and the next block into a double carry chain with ADCX and ADOX. @@ -118,6 +126,10 @@ _bignum_double_p384: sbbq $0, d5 movq d5, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_half_p384.S b/x86_att/p384/bignum_half_p384.S index 5b58e3f536..d7020baf07 100644 --- a/x86_att/p384/bignum_half_p384.S +++ b/x86_att/p384/bignum_half_p384.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -47,6 +48,13 @@ bignum_half_p384: _bignum_half_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load lowest digit and get a mask for its lowest bit in d3 movq (x), a @@ -94,6 +102,10 @@ _bignum_half_p384: // Return +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_littleendian_6.S b/x86_att/p384/bignum_littleendian_6.S index 314167a7eb..a62797d102 100644 --- a/x86_att/p384/bignum_littleendian_6.S +++ b/x86_att/p384/bignum_littleendian_6.S @@ -32,6 +32,7 @@ // Since x86 is little-endian, this is just copying. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -54,6 +55,13 @@ _bignum_fromlebytes_6: bignum_tolebytes_6: _bignum_tolebytes_6: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + movq (x), a movq a, (z) @@ -72,6 +80,10 @@ _bignum_tolebytes_6: movq 40(x), a movq a, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_mod_n384.S b/x86_att/p384/bignum_mod_n384.S index 1d50e536ec..50675e9d2a 100644 --- a/x86_att/p384/bignum_mod_n384.S +++ b/x86_att/p384/bignum_mod_n384.S @@ -23,6 +23,7 @@ // Reduction is modulo the group order of the NIST curve P-384. // // Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- @@ -54,6 +55,14 @@ bignum_mod_n384: _bignum_mod_n384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save extra registers pushq %rbx @@ -191,6 +200,10 @@ writeback: popq %r13 popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret shortinput: diff --git a/x86_att/p384/bignum_mod_n384_6.S b/x86_att/p384/bignum_mod_n384_6.S index b866e70409..4a0a4ac564 100644 --- a/x86_att/p384/bignum_mod_n384_6.S +++ b/x86_att/p384/bignum_mod_n384_6.S @@ -23,6 +23,7 @@ // Reduction is modulo the group order of the NIST curve P-384. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_mod_n384_6: _bignum_mod_n384_6: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load the input and compute x + (2^384 - n_384) movq $0x1313e695333ad68d, a @@ -106,6 +114,10 @@ _bignum_mod_n384_6: sbbq $0, d5 movq d5, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_mod_n384_alt.S b/x86_att/p384/bignum_mod_n384_alt.S index bdce0a0926..6d2dc2b7f8 100644 --- a/x86_att/p384/bignum_mod_n384_alt.S +++ b/x86_att/p384/bignum_mod_n384_alt.S @@ -23,6 +23,7 @@ // Reduction is modulo the group order of the NIST curve P-384. // // Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- @@ -55,6 +56,14 @@ bignum_mod_n384_alt: _bignum_mod_n384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save extra registers pushq %rbp @@ -195,6 +204,10 @@ writeback: popq %r12 popq %rbx popq %rbp +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret shortinput: diff --git a/x86_att/p384/bignum_mod_p384.S b/x86_att/p384/bignum_mod_p384.S index 7f3de8c076..c82561d6a6 100644 --- a/x86_att/p384/bignum_mod_p384.S +++ b/x86_att/p384/bignum_mod_p384.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t k, uint64_t *x); // // Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- @@ -53,6 +54,14 @@ bignum_mod_p384: _bignum_mod_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save extra registers pushq %rbx @@ -190,6 +199,10 @@ writeback: popq %r13 popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret shortinput: diff --git a/x86_att/p384/bignum_mod_p384_6.S b/x86_att/p384/bignum_mod_p384_6.S index c275e83f9f..bbd12524a2 100644 --- a/x86_att/p384/bignum_mod_p384_6.S +++ b/x86_att/p384/bignum_mod_p384_6.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -50,6 +51,13 @@ bignum_mod_p384_6: _bignum_mod_p384_6: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load the input and subtract p_384 from it movq (x), d0 @@ -104,6 +112,10 @@ _bignum_mod_p384_6: sbbq $0, d5 movq d5, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_mod_p384_alt.S b/x86_att/p384/bignum_mod_p384_alt.S index a4ef191f46..69ee474326 100644 --- a/x86_att/p384/bignum_mod_p384_alt.S +++ b/x86_att/p384/bignum_mod_p384_alt.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t k, uint64_t *x); // // Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- @@ -57,6 +58,14 @@ bignum_mod_p384_alt: _bignum_mod_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save extra registers pushq %rbx @@ -194,6 +203,10 @@ writeback: popq %r13 popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret shortinput: diff --git a/x86_att/p384/bignum_montmul_p384.S b/x86_att/p384/bignum_montmul_p384.S index e6b8ba5598..160c293430 100644 --- a/x86_att/p384/bignum_montmul_p384.S +++ b/x86_att/p384/bignum_montmul_p384.S @@ -25,6 +25,7 @@ // the "usual" case x < p_384 and y < p_384). // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ----------------------------------------------------------------------------- @@ -97,6 +98,14 @@ bignum_montmul_p384: _bignum_montmul_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save more registers to play with pushq %rbx @@ -276,6 +285,10 @@ _bignum_montmul_p384: popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_montmul_p384_alt.S b/x86_att/p384/bignum_montmul_p384_alt.S index b684265236..3d24dc4ccf 100644 --- a/x86_att/p384/bignum_montmul_p384_alt.S +++ b/x86_att/p384/bignum_montmul_p384_alt.S @@ -25,6 +25,7 @@ // the "usual" case x < p_384 and y < p_384). // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ----------------------------------------------------------------------------- @@ -119,6 +120,14 @@ bignum_montmul_p384_alt: _bignum_montmul_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save more registers to play with pushq %rbx @@ -301,6 +310,10 @@ _bignum_montmul_p384_alt: popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_montsqr_p384.S b/x86_att/p384/bignum_montsqr_p384.S index 2e3d964597..bd0517b911 100644 --- a/x86_att/p384/bignum_montsqr_p384.S +++ b/x86_att/p384/bignum_montsqr_p384.S @@ -24,6 +24,7 @@ // guaranteed in particular if x < p_384 initially (the "intended" case). // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -94,6 +95,13 @@ bignum_montsqr_p384: _bignum_montsqr_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %rbx @@ -281,6 +289,10 @@ _bignum_montsqr_p384: popq %r12 popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_montsqr_p384_alt.S b/x86_att/p384/bignum_montsqr_p384_alt.S index 3a3f9a2715..5588835538 100644 --- a/x86_att/p384/bignum_montsqr_p384_alt.S +++ b/x86_att/p384/bignum_montsqr_p384_alt.S @@ -24,6 +24,7 @@ // guaranteed in particular if x < p_384 initially (the "intended" case). // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -116,6 +117,13 @@ bignum_montsqr_p384_alt: _bignum_montsqr_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %rbx @@ -326,6 +334,10 @@ _bignum_montsqr_p384_alt: popq %r12 popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_mux_6.S b/x86_att/p384/bignum_mux_6.S index cc484bf9bb..92f222c080 100644 --- a/x86_att/p384/bignum_mux_6.S +++ b/x86_att/p384/bignum_mux_6.S @@ -24,6 +24,7 @@ // It is assumed that all numbers x, y and z have the same size 6 digits. // // Standard x86-64 ABI: RDI = p, RSI = z, RDX = x, RCX = y +// Microsoft x64 ABI: RCX = p, RDX = z, R8 = x, R9 = y // ---------------------------------------------------------------------------- @@ -41,6 +42,15 @@ bignum_mux_6: _bignum_mux_6: + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + movq %r9, %rcx +#endif testq p, p movq (x), a @@ -73,6 +83,10 @@ _bignum_mux_6: cmovzq b, a movq a, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_neg_p384.S b/x86_att/p384/bignum_neg_p384.S index 5a6c62ea53..42094e3bc0 100644 --- a/x86_att/p384/bignum_neg_p384.S +++ b/x86_att/p384/bignum_neg_p384.S @@ -20,6 +20,7 @@ // extern void bignum_neg_p384 (uint64_t z[static 6], uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -42,6 +43,13 @@ bignum_neg_p384: _bignum_neg_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Or together the input digits and create a bitmask q if this is nonzero, so // that we avoid doing -0 = p_384 and hence maintain strict modular reduction @@ -85,6 +93,10 @@ _bignum_neg_p384: movq n4, 32(z) movq q, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_nonzero_6.S b/x86_att/p384/bignum_nonzero_6.S index 585aa1e58b..f75b3ebec2 100644 --- a/x86_att/p384/bignum_nonzero_6.S +++ b/x86_att/p384/bignum_nonzero_6.S @@ -20,6 +20,7 @@ // extern uint64_t bignum_nonzero_6(uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = x, returns RAX +// Microsoft x64 ABI: RCX = x, returns RAX // ---------------------------------------------------------------------------- @@ -37,6 +38,12 @@ bignum_nonzero_6: _bignum_nonzero_6: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi +#endif + // Generate a = an OR of all the words in the bignum movq (x), a @@ -52,6 +59,10 @@ _bignum_nonzero_6: movl $1, dshort cmovnzq d, a +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_optneg_p384.S b/x86_att/p384/bignum_optneg_p384.S index f14f8dc682..1b20bb52e4 100644 --- a/x86_att/p384/bignum_optneg_p384.S +++ b/x86_att/p384/bignum_optneg_p384.S @@ -22,6 +22,7 @@ // (uint64_t z[static 6], uint64_t p, uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = p, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = p, R8 = x // ---------------------------------------------------------------------------- @@ -45,6 +46,14 @@ bignum_optneg_p384: _bignum_optneg_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Adjust q by zeroing it if the input is zero (to avoid giving -0 = p_384, // which is not strictly reduced even though it's correct modulo p_384). // This step is redundant if we know a priori that the input is nonzero, which @@ -106,6 +115,10 @@ _bignum_optneg_p384: xorq q, n5 movq n5, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_sub_p384.S b/x86_att/p384/bignum_sub_p384.S index 36322ed9db..2738c1b442 100644 --- a/x86_att/p384/bignum_sub_p384.S +++ b/x86_att/p384/bignum_sub_p384.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6], uint64_t y[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -51,6 +52,14 @@ bignum_sub_p384: _bignum_sub_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Subtract the inputs as [d5;d4;d3;d2;d1;d0] = x - y (modulo 2^384) // Capture the top carry as a bitmask for the condition x < y @@ -103,6 +112,10 @@ _bignum_sub_p384: sbbq $0, d5 movq d5, 40(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_tomont_p384.S b/x86_att/p384/bignum_tomont_p384.S index 85105e4667..03e2ca53e4 100644 --- a/x86_att/p384/bignum_tomont_p384.S +++ b/x86_att/p384/bignum_tomont_p384.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -96,6 +97,13 @@ bignum_tomont_p384: _bignum_tomont_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // We are essentially just doing a Montgomery multiplication of x and the // precomputed constant y = 2^768 mod p, so the code is almost the same // modulo a few registers and the change from loading y[i] to using constants, @@ -282,6 +290,10 @@ _bignum_tomont_p384: popq %r12 popq %rbp +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_tomont_p384_alt.S b/x86_att/p384/bignum_tomont_p384_alt.S index 6686e7804e..5eab13d194 100644 --- a/x86_att/p384/bignum_tomont_p384_alt.S +++ b/x86_att/p384/bignum_tomont_p384_alt.S @@ -21,6 +21,7 @@ // (uint64_t z[static 6], uint64_t x[static 6]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -114,6 +115,13 @@ bignum_tomont_p384_alt: _bignum_tomont_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // We are essentially just doing a Montgomery multiplication of x and the // precomputed constant y = 2^768 mod p, so the code is almost the same // modulo a few registers and the change from loading y[i] to using constants, @@ -311,6 +319,10 @@ _bignum_tomont_p384_alt: popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_triple_p384.S b/x86_att/p384/bignum_triple_p384.S index 05464aa711..237f10b929 100644 --- a/x86_att/p384/bignum_triple_p384.S +++ b/x86_att/p384/bignum_triple_p384.S @@ -24,6 +24,7 @@ // and the result is always fully reduced, i.e. z = (3 * x) mod p_384. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_triple_p384: _bignum_triple_p384: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // We seem to need (just!) one extra register, which we need to save and restore pushq %rbx @@ -136,6 +144,10 @@ _bignum_triple_p384: // Return popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p384/bignum_triple_p384_alt.S b/x86_att/p384/bignum_triple_p384_alt.S index 9fb4a9df0d..7ae907863e 100644 --- a/x86_att/p384/bignum_triple_p384_alt.S +++ b/x86_att/p384/bignum_triple_p384_alt.S @@ -24,6 +24,7 @@ // and the result is always fully reduced, i.e. z = (3 * x) mod p_384. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -54,6 +55,13 @@ bignum_triple_p384_alt: _bignum_triple_p384_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // We seem to need (just!) one extra register, which we need to save and restore pushq %rbx @@ -146,6 +154,10 @@ _bignum_triple_p384_alt: // Return popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_add_p521.S b/x86_att/p521/bignum_add_p521.S index 71913b8108..34ad429c7e 100644 --- a/x86_att/p521/bignum_add_p521.S +++ b/x86_att/p521/bignum_add_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -51,6 +52,14 @@ bignum_add_p521: _bignum_add_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save more registers to play with pushq %rbx @@ -116,6 +125,10 @@ _bignum_add_p521: popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_cmul_p521.S b/x86_att/p521/bignum_cmul_p521.S index 04601fe680..d4381c99b3 100644 --- a/x86_att/p521/bignum_cmul_p521.S +++ b/x86_att/p521/bignum_cmul_p521.S @@ -22,6 +22,7 @@ // (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = c, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- @@ -63,6 +64,14 @@ bignum_cmul_p521: _bignum_cmul_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save additional registers to use pushq %rbx @@ -162,6 +171,10 @@ _bignum_cmul_p521: popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_cmul_p521_alt.S b/x86_att/p521/bignum_cmul_p521_alt.S index 8dc1b72f4f..8dec8879a7 100644 --- a/x86_att/p521/bignum_cmul_p521_alt.S +++ b/x86_att/p521/bignum_cmul_p521_alt.S @@ -22,6 +22,7 @@ // (uint64_t z[static 9], uint64_t c, uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = c, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- @@ -67,6 +68,14 @@ bignum_cmul_p521_alt: _bignum_cmul_p521_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save additional registers to use pushq %rbx @@ -196,6 +205,10 @@ _bignum_cmul_p521_alt: popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_deamont_p521.S b/x86_att/p521/bignum_deamont_p521.S index 762a8472e4..2543c0ec86 100644 --- a/x86_att/p521/bignum_deamont_p521.S +++ b/x86_att/p521/bignum_deamont_p521.S @@ -24,6 +24,7 @@ // "almost" meaning any 9-digit input will work, with no range restriction. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_deamont_p521: _bignum_deamont_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %rbx @@ -134,6 +142,10 @@ _bignum_deamont_p521: popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_demont_p521.S b/x86_att/p521/bignum_demont_p521.S index 29e699f043..f38cea2194 100644 --- a/x86_att/p521/bignum_demont_p521.S +++ b/x86_att/p521/bignum_demont_p521.S @@ -24,6 +24,7 @@ // use the variant "bignum_deamont_p521" instead. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_demont_p521: _bignum_demont_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Rotate, as a 521-bit quantity, by 9*64 - 521 = 55 bits right. movq (x), d0 @@ -83,6 +91,10 @@ _bignum_demont_p521: movq d7, 56(z) shrq $55, d8 movq d8, 64(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_double_p521.S b/x86_att/p521/bignum_double_p521.S index b147401855..33b29f8ec5 100644 --- a/x86_att/p521/bignum_double_p521.S +++ b/x86_att/p521/bignum_double_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -39,6 +40,13 @@ bignum_double_p521: _bignum_double_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // We can decide whether 2 * x >= p_521 just by 2 * x >= 2^521, which // as we assume x < p_521 amounts to looking at bit 8 of the top word @@ -85,6 +93,10 @@ _bignum_double_p521: andq $0x1FF, c movq c, 64(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_fromlebytes_p521.S b/x86_att/p521/bignum_fromlebytes_p521.S index 4da552b090..0fdc34ae07 100644 --- a/x86_att/p521/bignum_fromlebytes_p521.S +++ b/x86_att/p521/bignum_fromlebytes_p521.S @@ -25,6 +25,7 @@ // Since x86 is little-endian, this is just copying. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -39,6 +40,13 @@ bignum_fromlebytes_p521: _bignum_fromlebytes_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + movq (x), a movq a, (z) @@ -67,6 +75,10 @@ _bignum_fromlebytes_p521: movw 64(x), %ax movq a, 64(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_half_p521.S b/x86_att/p521/bignum_half_p521.S index 84b244a238..5550ba54da 100644 --- a/x86_att/p521/bignum_half_p521.S +++ b/x86_att/p521/bignum_half_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -50,6 +51,13 @@ bignum_half_p521: _bignum_half_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // We do a 521-bit rotation one bit right, since 2^521 == 1 (mod p_521) movq (x), d0 @@ -94,6 +102,10 @@ _bignum_half_p521: // Return +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_mod_n521_9.S b/x86_att/p521/bignum_mod_n521_9.S index e0a6af1619..eb21672efb 100644 --- a/x86_att/p521/bignum_mod_n521_9.S +++ b/x86_att/p521/bignum_mod_n521_9.S @@ -23,6 +23,7 @@ // Reduction is modulo the group order of the NIST curve P-521. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_mod_n521_9: _bignum_mod_n521_9: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load the top digit, putting a bit-stuffed version in output buffer. // The initial quotient estimate is q = h + 1 where x = 2^521 * h + t // The last add also clears the CF and OF flags ready for the carry chain. @@ -139,6 +147,10 @@ _bignum_mod_n521_9: andl $0x1FF, cshort movq c, 64(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_mod_n521_9_alt.S b/x86_att/p521/bignum_mod_n521_9_alt.S index ebf7476c6e..2251304d92 100644 --- a/x86_att/p521/bignum_mod_n521_9_alt.S +++ b/x86_att/p521/bignum_mod_n521_9_alt.S @@ -23,6 +23,7 @@ // Reduction is modulo the group order of the NIST curve P-521. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_mod_n521_9_alt: _bignum_mod_n521_9_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load the top digit, putting a bit-stuffed version in output buffer. // The initial quotient estimate is q = h + 1 where x = 2^521 * h + t // The last add also clears the CF and OF flags ready for the carry chain. @@ -148,6 +156,10 @@ _bignum_mod_n521_9_alt: andl $0x1FF, cshort movq c, 64(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_mod_p521_9.S b/x86_att/p521/bignum_mod_p521_9.S index 07deac20ea..cb808d9142 100644 --- a/x86_att/p521/bignum_mod_p521_9.S +++ b/x86_att/p521/bignum_mod_p521_9.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -50,6 +51,13 @@ bignum_mod_p521_9: _bignum_mod_p521_9: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save one more register pushq %rbx @@ -111,6 +119,10 @@ _bignum_mod_p521_9: // Restore register popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_montmul_p521.S b/x86_att/p521/bignum_montmul_p521.S index 98eb07693b..377a6514e0 100644 --- a/x86_att/p521/bignum_montmul_p521.S +++ b/x86_att/p521/bignum_montmul_p521.S @@ -26,6 +26,7 @@ // can be considered a Montgomery operation to base 2^521. // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -52,6 +53,14 @@ bignum_montmul_p521: _bignum_montmul_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save more registers to play with and make temporary space on stack pushq %rbp @@ -403,6 +412,10 @@ _bignum_montmul_p521: popq %rbx popq %rbp +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_montmul_p521_alt.S b/x86_att/p521/bignum_montmul_p521_alt.S index 21a9995e0c..97dbf57d42 100644 --- a/x86_att/p521/bignum_montmul_p521_alt.S +++ b/x86_att/p521/bignum_montmul_p521_alt.S @@ -26,6 +26,7 @@ // can be considered a Montgomery operation to base 2^521. // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -69,6 +70,14 @@ bignum_montmul_p521_alt: _bignum_montmul_p521_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Make more registers available and make temporary space on stack pushq %r12 @@ -333,6 +342,10 @@ _bignum_montmul_p521_alt: popq %r14 popq %r13 popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_montsqr_p521.S b/x86_att/p521/bignum_montsqr_p521.S index 2ef12343d1..8b5cc46d04 100644 --- a/x86_att/p521/bignum_montsqr_p521.S +++ b/x86_att/p521/bignum_montsqr_p521.S @@ -26,6 +26,7 @@ // considered a Montgomery operation to base 2^521. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -63,6 +64,13 @@ bignum_montsqr_p521: _bignum_montsqr_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with and make temporary space on stack pushq %rbp @@ -313,6 +321,10 @@ _bignum_montsqr_p521: popq %r12 popq %rbp +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_montsqr_p521_alt.S b/x86_att/p521/bignum_montsqr_p521_alt.S index 2dbb5bb046..62b63ea61e 100644 --- a/x86_att/p521/bignum_montsqr_p521_alt.S +++ b/x86_att/p521/bignum_montsqr_p521_alt.S @@ -26,6 +26,7 @@ // considered a Montgomery operation to base 2^521. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -98,6 +99,13 @@ bignum_montsqr_p521_alt: _bignum_montsqr_p521_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Make more registers available and make temporary space on stack pushq %rbx @@ -326,6 +334,10 @@ _bignum_montsqr_p521_alt: popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_mul_p521.S b/x86_att/p521/bignum_mul_p521.S index ec13a859e7..d5656f8e45 100644 --- a/x86_att/p521/bignum_mul_p521.S +++ b/x86_att/p521/bignum_mul_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -47,6 +48,14 @@ bignum_mul_p521: _bignum_mul_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save more registers to play with and make temporary space on stack pushq %rbp @@ -380,6 +389,10 @@ _bignum_mul_p521: popq %rbx popq %rbp +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_mul_p521_alt.S b/x86_att/p521/bignum_mul_p521_alt.S index b6c88348c2..2eee34b99e 100644 --- a/x86_att/p521/bignum_mul_p521_alt.S +++ b/x86_att/p521/bignum_mul_p521_alt.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -64,6 +65,14 @@ bignum_mul_p521_alt: _bignum_mul_p521_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Make more registers available and make temporary space on stack pushq %r12 @@ -310,6 +319,10 @@ _bignum_mul_p521_alt: popq %r14 popq %r13 popq %r12 +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_neg_p521.S b/x86_att/p521/bignum_neg_p521.S index 5c865180bc..453ce2a2df 100644 --- a/x86_att/p521/bignum_neg_p521.S +++ b/x86_att/p521/bignum_neg_p521.S @@ -20,6 +20,7 @@ // extern void bignum_neg_p521 (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -41,6 +42,13 @@ bignum_neg_p521: _bignum_neg_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Load most inputs (into the limited registers) and OR all of them to get p movq (x), d0 @@ -93,6 +101,10 @@ _bignum_neg_p521: // Return +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_optneg_p521.S b/x86_att/p521/bignum_optneg_p521.S index ebd48f5d2f..1203a8e735 100644 --- a/x86_att/p521/bignum_optneg_p521.S +++ b/x86_att/p521/bignum_optneg_p521.S @@ -22,6 +22,7 @@ // (uint64_t z[static 9], uint64_t p, uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = p, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = p, R8 = x // ---------------------------------------------------------------------------- @@ -43,6 +44,14 @@ bignum_optneg_p521: _bignum_optneg_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Load most inputs (into the limited registers) and OR all of them to get q movq (x), d0 @@ -97,6 +106,10 @@ _bignum_optneg_p521: // Return +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_sqr_p521.S b/x86_att/p521/bignum_sqr_p521.S index 6f999d0d7c..7f6bb58223 100644 --- a/x86_att/p521/bignum_sqr_p521.S +++ b/x86_att/p521/bignum_sqr_p521.S @@ -20,6 +20,7 @@ // extern void bignum_sqr_p521 (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -57,6 +58,13 @@ bignum_sqr_p521: _bignum_sqr_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with and make temporary space on stack pushq %rbp @@ -289,6 +297,10 @@ _bignum_sqr_p521: popq %r12 popq %rbp +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_sqr_p521_alt.S b/x86_att/p521/bignum_sqr_p521_alt.S index 527313b0c4..03317366de 100644 --- a/x86_att/p521/bignum_sqr_p521_alt.S +++ b/x86_att/p521/bignum_sqr_p521_alt.S @@ -20,6 +20,7 @@ // extern void bignum_sqr_p521_alt (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -92,6 +93,13 @@ bignum_sqr_p521_alt: _bignum_sqr_p521_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Make more registers available and make temporary space on stack pushq %rbx @@ -301,6 +309,10 @@ _bignum_sqr_p521_alt: popq %r13 popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_sub_p521.S b/x86_att/p521/bignum_sub_p521.S index 0d587c9bb4..fd68c98026 100644 --- a/x86_att/p521/bignum_sub_p521.S +++ b/x86_att/p521/bignum_sub_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9], uint64_t y[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x, RDX = y +// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- @@ -50,6 +51,14 @@ bignum_sub_p521: _bignum_sub_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + // Save more registers to play with pushq %rbx @@ -106,6 +115,10 @@ _bignum_sub_p521: popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_tolebytes_p521.S b/x86_att/p521/bignum_tolebytes_p521.S index b64fc9bbe7..e6f6148406 100644 --- a/x86_att/p521/bignum_tolebytes_p521.S +++ b/x86_att/p521/bignum_tolebytes_p521.S @@ -25,6 +25,7 @@ // Since x86 is little-endian, this is just copying. // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -39,6 +40,13 @@ bignum_tolebytes_p521: _bignum_tolebytes_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + movq (x), a movq a, (z) @@ -66,6 +74,10 @@ _bignum_tolebytes_p521: movq 64(x), a movw %ax, 64(z) +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_tomont_p521.S b/x86_att/p521/bignum_tomont_p521.S index c6aabce855..a289dcff1b 100644 --- a/x86_att/p521/bignum_tomont_p521.S +++ b/x86_att/p521/bignum_tomont_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -50,6 +51,13 @@ bignum_tomont_p521: _bignum_tomont_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save one more register pushq %rbx @@ -129,6 +137,10 @@ _bignum_tomont_p521: // Restore register popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_triple_p521.S b/x86_att/p521/bignum_triple_p521.S index 0b1f397cad..47c65c9781 100644 --- a/x86_att/p521/bignum_triple_p521.S +++ b/x86_att/p521/bignum_triple_p521.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -51,6 +52,13 @@ bignum_triple_p521: _bignum_triple_p521: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save more registers to play with pushq %rbx @@ -149,6 +157,10 @@ _bignum_triple_p521: popq %r12 popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) diff --git a/x86_att/p521/bignum_triple_p521_alt.S b/x86_att/p521/bignum_triple_p521_alt.S index 966b42408d..3dfdfb50aa 100644 --- a/x86_att/p521/bignum_triple_p521_alt.S +++ b/x86_att/p521/bignum_triple_p521_alt.S @@ -21,6 +21,7 @@ // (uint64_t z[static 9], uint64_t x[static 9]); // // Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- @@ -52,6 +53,13 @@ bignum_triple_p521_alt: _bignum_triple_p521_alt: +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + // Save additional registers to use pushq %rbx @@ -157,6 +165,10 @@ _bignum_triple_p521_alt: popq %rbp popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif ret #if defined(__linux__) && defined(__ELF__) From 22d065272f057fe8893023288c68b7ec9e719c9b Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Thu, 19 May 2022 15:25:14 -0400 Subject: [PATCH 04/42] Platform-aware symbol macros s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/e864876633b206f73c949f74b42098e466827545 --- arm/curve25519/bignum_neg_p25519.S | 8 ++++---- arm/fastmul/bignum_emontredc_8n.S | 10 +++++----- arm/fastmul/bignum_kmul_16_32.S | 8 ++++---- arm/fastmul/bignum_kmul_32_64.S | 8 ++++---- arm/fastmul/bignum_ksqr_16_32.S | 8 ++++---- arm/fastmul/bignum_ksqr_32_64.S | 8 ++++---- arm/generic/bignum_ge.S | 8 ++++---- arm/generic/bignum_mul.S | 8 ++++---- arm/generic/bignum_optsub.S | 8 ++++---- arm/generic/bignum_sqr.S | 8 ++++---- arm/p384/bignum_add_p384.S | 8 ++++---- arm/p384/bignum_bigendian_6.S | 24 ++++++++++++------------ arm/p384/bignum_cmul_p384.S | 16 ++++++++-------- arm/p384/bignum_deamont_p384.S | 16 ++++++++-------- arm/p384/bignum_demont_p384.S | 16 ++++++++-------- arm/p384/bignum_double_p384.S | 8 ++++---- arm/p384/bignum_half_p384.S | 8 ++++---- arm/p384/bignum_littleendian_6.S | 24 ++++++++++++------------ arm/p384/bignum_mod_n384.S | 16 ++++++++-------- arm/p384/bignum_mod_n384_6.S | 8 ++++---- arm/p384/bignum_mod_p384.S | 16 ++++++++-------- arm/p384/bignum_mod_p384_6.S | 8 ++++---- arm/p384/bignum_montmul_p384.S | 8 ++++---- arm/p384/bignum_montmul_p384_alt.S | 8 ++++---- arm/p384/bignum_montsqr_p384.S | 8 ++++---- arm/p384/bignum_montsqr_p384_alt.S | 8 ++++---- arm/p384/bignum_mux_6.S | 10 +++++----- arm/p384/bignum_neg_p384.S | 8 ++++---- arm/p384/bignum_nonzero_6.S | 8 ++++---- arm/p384/bignum_optneg_p384.S | 8 ++++---- arm/p384/bignum_sub_p384.S | 8 ++++---- arm/p384/bignum_tomont_p384.S | 16 ++++++++-------- arm/p384/bignum_triple_p384.S | 16 ++++++++-------- arm/p521/bignum_add_p521.S | 8 ++++---- arm/p521/bignum_cmul_p521.S | 16 ++++++++-------- arm/p521/bignum_deamont_p521.S | 8 ++++---- arm/p521/bignum_demont_p521.S | 8 ++++---- arm/p521/bignum_double_p521.S | 8 ++++---- arm/p521/bignum_fromlebytes_p521.S | 8 ++++---- arm/p521/bignum_half_p521.S | 8 ++++---- arm/p521/bignum_mod_n521_9.S | 16 ++++++++-------- arm/p521/bignum_mod_p521_9.S | 8 ++++---- arm/p521/bignum_montmul_p521.S | 8 ++++---- arm/p521/bignum_montmul_p521_alt.S | 8 ++++---- arm/p521/bignum_montsqr_p521.S | 8 ++++---- arm/p521/bignum_montsqr_p521_alt.S | 8 ++++---- arm/p521/bignum_mul_p521.S | 8 ++++---- arm/p521/bignum_mul_p521_alt.S | 8 ++++---- arm/p521/bignum_neg_p521.S | 8 ++++---- arm/p521/bignum_optneg_p521.S | 8 ++++---- arm/p521/bignum_sqr_p521.S | 8 ++++---- arm/p521/bignum_sqr_p521_alt.S | 8 ++++---- arm/p521/bignum_sub_p521.S | 8 ++++---- arm/p521/bignum_tolebytes_p521.S | 8 ++++---- arm/p521/bignum_tomont_p521.S | 8 ++++---- arm/p521/bignum_triple_p521.S | 16 ++++++++-------- include/_internal_s2n_bignum.h | 18 ++++++++++++++++++ x86_att/curve25519/bignum_neg_p25519.S | 8 ++++---- x86_att/p384/bignum_add_p384.S | 9 ++++----- x86_att/p384/bignum_bigendian_6.S | 23 +++++++++++------------ x86_att/p384/bignum_cmul_p384.S | 8 ++++---- x86_att/p384/bignum_cmul_p384_alt.S | 9 ++++----- x86_att/p384/bignum_deamont_p384.S | 10 ++++------ x86_att/p384/bignum_deamont_p384_alt.S | 9 ++++----- x86_att/p384/bignum_demont_p384.S | 9 ++++----- x86_att/p384/bignum_demont_p384_alt.S | 9 ++++----- x86_att/p384/bignum_double_p384.S | 8 ++++---- x86_att/p384/bignum_half_p384.S | 8 ++++---- x86_att/p384/bignum_littleendian_6.S | 23 +++++++++++------------ x86_att/p384/bignum_mod_n384.S | 9 +++++---- x86_att/p384/bignum_mod_n384_6.S | 9 +++++---- x86_att/p384/bignum_mod_n384_alt.S | 9 +++++---- x86_att/p384/bignum_mod_p384.S | 9 +++++---- x86_att/p384/bignum_mod_p384_6.S | 9 +++++---- x86_att/p384/bignum_mod_p384_alt.S | 9 +++++---- x86_att/p384/bignum_montmul_p384.S | 9 +++++---- x86_att/p384/bignum_montmul_p384_alt.S | 9 +++++---- x86_att/p384/bignum_montsqr_p384.S | 9 +++++---- x86_att/p384/bignum_montsqr_p384_alt.S | 9 +++++---- x86_att/p384/bignum_mux_6.S | 9 +++++---- x86_att/p384/bignum_neg_p384.S | 9 +++++---- x86_att/p384/bignum_nonzero_6.S | 9 +++++---- x86_att/p384/bignum_optneg_p384.S | 9 +++++---- x86_att/p384/bignum_sub_p384.S | 9 +++++---- x86_att/p384/bignum_tomont_p384.S | 9 +++++---- x86_att/p384/bignum_tomont_p384_alt.S | 9 +++++---- x86_att/p384/bignum_triple_p384.S | 9 +++++---- x86_att/p384/bignum_triple_p384_alt.S | 9 +++++---- x86_att/p521/bignum_add_p521.S | 9 +++++---- x86_att/p521/bignum_cmul_p521.S | 9 +++++---- x86_att/p521/bignum_cmul_p521_alt.S | 9 +++++---- x86_att/p521/bignum_deamont_p521.S | 9 +++++---- x86_att/p521/bignum_demont_p521.S | 9 +++++---- x86_att/p521/bignum_double_p521.S | 9 +++++---- x86_att/p521/bignum_fromlebytes_p521.S | 9 +++++---- x86_att/p521/bignum_half_p521.S | 9 +++++---- x86_att/p521/bignum_mod_n521_9.S | 9 +++++---- x86_att/p521/bignum_mod_n521_9_alt.S | 9 +++++---- x86_att/p521/bignum_mod_p521_9.S | 9 +++++---- x86_att/p521/bignum_montmul_p521.S | 9 +++++---- x86_att/p521/bignum_montmul_p521_alt.S | 9 +++++---- x86_att/p521/bignum_montsqr_p521.S | 9 +++++---- x86_att/p521/bignum_montsqr_p521_alt.S | 9 +++++---- x86_att/p521/bignum_mul_p521.S | 9 +++++---- x86_att/p521/bignum_mul_p521_alt.S | 9 +++++---- x86_att/p521/bignum_neg_p521.S | 9 +++++---- x86_att/p521/bignum_optneg_p521.S | 9 +++++---- x86_att/p521/bignum_sqr_p521.S | 9 +++++---- x86_att/p521/bignum_sqr_p521_alt.S | 9 +++++---- x86_att/p521/bignum_sub_p521.S | 9 +++++---- x86_att/p521/bignum_tolebytes_p521.S | 9 +++++---- x86_att/p521/bignum_tomont_p521.S | 9 +++++---- x86_att/p521/bignum_triple_p521.S | 9 +++++---- x86_att/p521/bignum_triple_p521_alt.S | 9 +++++---- 114 files changed, 587 insertions(+), 533 deletions(-) create mode 100644 include/_internal_s2n_bignum.h diff --git a/arm/curve25519/bignum_neg_p25519.S b/arm/curve25519/bignum_neg_p25519.S index 24b9df2bf6..b88a48a6ee 100644 --- a/arm/curve25519/bignum_neg_p25519.S +++ b/arm/curve25519/bignum_neg_p25519.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_neg_p25519 - .globl _bignum_neg_p25519 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p25519) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p25519) .text .balign 4 @@ -38,8 +39,7 @@ #define c x6 #define d x7 -bignum_neg_p25519: -_bignum_neg_p25519: +S2N_BN_SYMBOL(bignum_neg_p25519): // Load the digits of x and compute [d3;d2;d1;d0] = (2^255 - 19) - x // while also computing c = the OR of the digits of x diff --git a/arm/fastmul/bignum_emontredc_8n.S b/arm/fastmul/bignum_emontredc_8n.S index a2b2360c21..104a0eb924 100644 --- a/arm/fastmul/bignum_emontredc_8n.S +++ b/arm/fastmul/bignum_emontredc_8n.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = m, X3 = w, returns X0 // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_emontredc_8n - .globl _bignum_emontredc_8n + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_emontredc_8n) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_emontredc_8n) .text .balign 4 @@ -208,10 +209,9 @@ // Main code // ***************************************************** -bignum_emontredc_8n: -_bignum_emontredc_8n: +S2N_BN_SYMBOL(bignum_emontredc_8n): - stp x19, x20, [sp, #-16]! +stp x19, x20, [sp, #-16]! stp x21, x22, [sp, #-16]! stp x23, x24, [sp, #-16]! stp x25, x26, [sp, #-16]! diff --git a/arm/fastmul/bignum_kmul_16_32.S b/arm/fastmul/bignum_kmul_16_32.S index ecd6b6daf9..9a29b26305 100644 --- a/arm/fastmul/bignum_kmul_16_32.S +++ b/arm/fastmul/bignum_kmul_16_32.S @@ -26,9 +26,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y, X3 = t // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_kmul_16_32 - .globl _bignum_kmul_16_32 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_kmul_16_32) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_kmul_16_32) .text .balign 4 @@ -44,8 +45,7 @@ #define s x29 #define m x19 -bignum_kmul_16_32: -_bignum_kmul_16_32: +S2N_BN_SYMBOL(bignum_kmul_16_32): // Save registers, including return address diff --git a/arm/fastmul/bignum_kmul_32_64.S b/arm/fastmul/bignum_kmul_32_64.S index c657d50dac..8706ac6f2f 100644 --- a/arm/fastmul/bignum_kmul_32_64.S +++ b/arm/fastmul/bignum_kmul_32_64.S @@ -26,9 +26,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y, X3 = t // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_kmul_32_64 - .globl _bignum_kmul_32_64 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_kmul_32_64) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_kmul_32_64) .text .balign 4 @@ -42,8 +43,7 @@ #define c x16 -bignum_kmul_32_64: -_bignum_kmul_32_64: +S2N_BN_SYMBOL(bignum_kmul_32_64): // Save extra registers and return address, store parameters safely diff --git a/arm/fastmul/bignum_ksqr_16_32.S b/arm/fastmul/bignum_ksqr_16_32.S index 25bf314ad8..36cf26d9cd 100644 --- a/arm/fastmul/bignum_ksqr_16_32.S +++ b/arm/fastmul/bignum_ksqr_16_32.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = t // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_ksqr_16_32 - .globl _bignum_ksqr_16_32 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_ksqr_16_32) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_ksqr_16_32) .text .balign 4 @@ -42,8 +43,7 @@ #define s x19 -bignum_ksqr_16_32: -_bignum_ksqr_16_32: +S2N_BN_SYMBOL(bignum_ksqr_16_32): // Save registers, including return address diff --git a/arm/fastmul/bignum_ksqr_32_64.S b/arm/fastmul/bignum_ksqr_32_64.S index eeb94be1c2..37d259e97b 100644 --- a/arm/fastmul/bignum_ksqr_32_64.S +++ b/arm/fastmul/bignum_ksqr_32_64.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = t // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_ksqr_32_64 - .globl _bignum_ksqr_32_64 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_ksqr_32_64) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_ksqr_32_64) .text .balign 4 @@ -41,8 +42,7 @@ #define c x16 -bignum_ksqr_32_64: -_bignum_ksqr_32_64: +S2N_BN_SYMBOL(bignum_ksqr_32_64): // Save extra registers and return address, store parameters safely diff --git a/arm/generic/bignum_ge.S b/arm/generic/bignum_ge.S index 19db769366..badd521b56 100644 --- a/arm/generic/bignum_ge.S +++ b/arm/generic/bignum_ge.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = m, X1 = x, X2 = n, X3 = y, returns X0 // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_ge - .globl _bignum_ge + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_ge) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_ge) .text .balign 4 @@ -37,8 +38,7 @@ #define d x6 -bignum_ge: -_bignum_ge: +S2N_BN_SYMBOL(bignum_ge): // Zero the main index counter for both branches diff --git a/arm/generic/bignum_mul.S b/arm/generic/bignum_mul.S index 055ae3bf7f..fc7227aa62 100644 --- a/arm/generic/bignum_mul.S +++ b/arm/generic/bignum_mul.S @@ -26,9 +26,10 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = m, X3 = x, X4 = n, X5 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mul - .globl _bignum_mul + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul) .text .balign 4 @@ -50,8 +51,7 @@ #define yy x15 -bignum_mul: -_bignum_mul: +S2N_BN_SYMBOL(bignum_mul): // If p = 0 the result is trivial and nothing needs doing diff --git a/arm/generic/bignum_optsub.S b/arm/generic/bignum_optsub.S index fd952d1369..b2c0653df7 100644 --- a/arm/generic/bignum_optsub.S +++ b/arm/generic/bignum_optsub.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = x, X3 = p, X4 = y, returns X0 // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_optsub - .globl _bignum_optsub + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optsub) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optsub) .text .balign 4 @@ -42,8 +43,7 @@ #define i x7 -bignum_optsub: -_bignum_optsub: +S2N_BN_SYMBOL(bignum_optsub): // if k = 0 do nothing. This is also the right top carry in X0 diff --git a/arm/generic/bignum_sqr.S b/arm/generic/bignum_sqr.S index f6e5a7439f..cff9f6c29d 100644 --- a/arm/generic/bignum_sqr.S +++ b/arm/generic/bignum_sqr.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = n, X3 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_sqr - .globl _bignum_sqr + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr) .text .balign 4 @@ -49,8 +50,7 @@ #define hh x14 #define ll x15 -bignum_sqr: -_bignum_sqr: +S2N_BN_SYMBOL(bignum_sqr): // If p = 0 the result is trivial and nothing needs doing diff --git a/arm/p384/bignum_add_p384.S b/arm/p384/bignum_add_p384.S index a2558da130..257a5f6a58 100644 --- a/arm/p384/bignum_add_p384.S +++ b/arm/p384/bignum_add_p384.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_add_p384 - .globl _bignum_add_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p384) .text .balign 4 @@ -41,8 +42,7 @@ #define d5 x10 -bignum_add_p384: -_bignum_add_p384: +S2N_BN_SYMBOL(bignum_add_p384): // First just add the numbers as c + [d5; d4; d3; d2; d1; d0] diff --git a/arm/p384/bignum_bigendian_6.S b/arm/p384/bignum_bigendian_6.S index 84bbf01425..d7f463aee5 100644 --- a/arm/p384/bignum_bigendian_6.S +++ b/arm/p384/bignum_bigendian_6.S @@ -35,13 +35,15 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_bigendian_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_bigendian_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_frombebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_frombebytes_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tobebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tobebytes_6) - .globl bignum_bigendian_6 - .globl _bignum_bigendian_6 - .globl bignum_frombebytes_6 - .globl _bignum_frombebytes_6 - .globl bignum_tobebytes_6 - .globl _bignum_tobebytes_6 .text .balign 4 @@ -57,12 +59,10 @@ // to allow x and z to point to the same buffer without using more // intermediate registers. -bignum_bigendian_6: -_bignum_bigendian_6: -bignum_frombebytes_6: -_bignum_frombebytes_6: -bignum_tobebytes_6: -_bignum_tobebytes_6: +S2N_BN_SYMBOL(bignum_bignum_bigendian_6): + +S2N_BN_SYMBOL(bignum_bignum_frombebytes_6): +S2N_BN_SYMBOL(bignum_bignum_tobebytes_6): // 0 and 5 words diff --git a/arm/p384/bignum_cmul_p384.S b/arm/p384/bignum_cmul_p384.S index 2c87e82fd5..6b10ad254c 100644 --- a/arm/p384/bignum_cmul_p384.S +++ b/arm/p384/bignum_cmul_p384.S @@ -23,11 +23,12 @@ // // Standard ARM ABI: X0 = z, X1 = c, X2 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_cmul_p384 - .globl _bignum_cmul_p384 - .globl bignum_cmul_p384_alt - .globl _bignum_cmul_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384_alt) .text .balign 4 @@ -57,10 +58,9 @@ #define l x9 -bignum_cmul_p384: -_bignum_cmul_p384: -bignum_cmul_p384_alt: -_bignum_cmul_p384_alt: +S2N_BN_SYMBOL(bignum_cmul_p384): + +S2N_BN_SYMBOL(bignum_cmul_p384_alt): // First do the multiply, straightforwardly, getting [h; d5; ...; d0] diff --git a/arm/p384/bignum_deamont_p384.S b/arm/p384/bignum_deamont_p384.S index a322a8c4a2..673cd97d53 100644 --- a/arm/p384/bignum_deamont_p384.S +++ b/arm/p384/bignum_deamont_p384.S @@ -25,11 +25,12 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_deamont_p384 - .globl _bignum_deamont_p384 - .globl bignum_deamont_p384_alt - .globl _bignum_deamont_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384_alt) .text .balign 4 @@ -90,10 +91,9 @@ #define v x9 #define w x10 -bignum_deamont_p384: -_bignum_deamont_p384: -bignum_deamont_p384_alt: -_bignum_deamont_p384_alt: +S2N_BN_SYMBOL(bignum_deamont_p384): + +S2N_BN_SYMBOL(bignum_deamont_p384_alt): // Set up an initial window with the input x and an extra leading zero diff --git a/arm/p384/bignum_demont_p384.S b/arm/p384/bignum_demont_p384.S index 00a0ffcacc..4debf61fd8 100644 --- a/arm/p384/bignum_demont_p384.S +++ b/arm/p384/bignum_demont_p384.S @@ -25,11 +25,12 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_demont_p384 - .globl _bignum_demont_p384 - .globl bignum_demont_p384_alt - .globl _bignum_demont_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384_alt) .text .balign 4 @@ -90,10 +91,9 @@ #define v x9 #define w x10 -bignum_demont_p384: -_bignum_demont_p384: -bignum_demont_p384_alt: -_bignum_demont_p384_alt: +S2N_BN_SYMBOL(bignum_demont_p384): + +S2N_BN_SYMBOL(bignum_demont_p384_alt): // Set up an initial window with the input x and an extra leading zero diff --git a/arm/p384/bignum_double_p384.S b/arm/p384/bignum_double_p384.S index 4d1189147a..32088b55a2 100644 --- a/arm/p384/bignum_double_p384.S +++ b/arm/p384/bignum_double_p384.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_double_p384 - .globl _bignum_double_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p384) .text .balign 4 @@ -45,8 +46,7 @@ #define n5 x14 -bignum_double_p384: -_bignum_double_p384: +S2N_BN_SYMBOL(bignum_double_p384): // Double the input number as 2 * x = c + [d5; d4; d3; d2; d1; d0] // It's worth considering doing this with extr...63 instead diff --git a/arm/p384/bignum_half_p384.S b/arm/p384/bignum_half_p384.S index f0ce12864a..f4f6be189b 100644 --- a/arm/p384/bignum_half_p384.S +++ b/arm/p384/bignum_half_p384.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_half_p384 - .globl _bignum_half_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p384) .text .balign 4 @@ -43,8 +44,7 @@ #define n x11 -bignum_half_p384: -_bignum_half_p384: +S2N_BN_SYMBOL(bignum_half_p384): // Load the 4 digits of x diff --git a/arm/p384/bignum_littleendian_6.S b/arm/p384/bignum_littleendian_6.S index 30d36a53bf..3c35b397f0 100644 --- a/arm/p384/bignum_littleendian_6.S +++ b/arm/p384/bignum_littleendian_6.S @@ -35,13 +35,15 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_littleendian_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_littleendian_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_6) - .globl bignum_littleendian_6 - .globl _bignum_littleendian_6 - .globl bignum_fromlebytes_6 - .globl _bignum_fromlebytes_6 - .globl bignum_tolebytes_6 - .globl _bignum_tolebytes_6 .text .balign 4 @@ -52,12 +54,10 @@ #define dshort w2 #define a x3 -bignum_littleendian_6: -_bignum_littleendian_6: -bignum_fromlebytes_6: -_bignum_fromlebytes_6: -bignum_tolebytes_6: -_bignum_tolebytes_6: +S2N_BN_SYMBOL(bignum_bignum_littleendian_6): + +S2N_BN_SYMBOL(bignum_bignum_fromlebytes_6): +S2N_BN_SYMBOL(bignum_bignum_tolebytes_6): // word 0 diff --git a/arm/p384/bignum_mod_n384.S b/arm/p384/bignum_mod_n384.S index b821c69235..8cb4bf6562 100644 --- a/arm/p384/bignum_mod_n384.S +++ b/arm/p384/bignum_mod_n384.S @@ -24,11 +24,12 @@ // // Standard ARM ABI: X0 = z, X1 = k, X2 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mod_n384 - .globl _bignum_mod_n384 - .globl bignum_mod_n384_alt - .globl _bignum_mod_n384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_alt) .text .balign 4 @@ -75,10 +76,9 @@ movk nn, n2, lsl #32; \ movk nn, n3, lsl #48 -bignum_mod_n384: -_bignum_mod_n384: -bignum_mod_n384_alt: -_bignum_mod_n384_alt: +S2N_BN_SYMBOL(bignum_mod_n384): + +S2N_BN_SYMBOL(bignum_mod_n384_alt): // If the input is already <= 5 words long, go to a trivial "copy" path diff --git a/arm/p384/bignum_mod_n384_6.S b/arm/p384/bignum_mod_n384_6.S index 4f37566197..b906b30cc1 100644 --- a/arm/p384/bignum_mod_n384_6.S +++ b/arm/p384/bignum_mod_n384_6.S @@ -24,9 +24,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mod_n384_6 - .globl _bignum_mod_n384_6 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_6) .text .balign 4 @@ -53,8 +54,7 @@ movk nn, n2, lsl #32; \ movk nn, n3, lsl #48 -bignum_mod_n384_6: -_bignum_mod_n384_6: +S2N_BN_SYMBOL(bignum_mod_n384_6): // Load the complicated lower three words of n_384 diff --git a/arm/p384/bignum_mod_p384.S b/arm/p384/bignum_mod_p384.S index 54c7ffcfa3..48d103a37c 100644 --- a/arm/p384/bignum_mod_p384.S +++ b/arm/p384/bignum_mod_p384.S @@ -22,11 +22,12 @@ // // Standard ARM ABI: X0 = z, X1 = k, X2 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mod_p384 - .globl _bignum_mod_p384 - .globl bignum_mod_p384_alt - .globl _bignum_mod_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_alt) .text .balign 4 @@ -53,10 +54,9 @@ #define n2 x17 -bignum_mod_p384: -_bignum_mod_p384: -bignum_mod_p384_alt: -_bignum_mod_p384_alt: +S2N_BN_SYMBOL(bignum_mod_p384): + +S2N_BN_SYMBOL(bignum_mod_p384_alt): // If the input is already <= 5 words long, go to a trivial "copy" path diff --git a/arm/p384/bignum_mod_p384_6.S b/arm/p384/bignum_mod_p384_6.S index f02b401f7a..fdad1cc846 100644 --- a/arm/p384/bignum_mod_p384_6.S +++ b/arm/p384/bignum_mod_p384_6.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mod_p384_6 - .globl _bignum_mod_p384_6 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_6) .text .balign 4 @@ -46,8 +47,7 @@ #define d5 x13 -bignum_mod_p384_6: -_bignum_mod_p384_6: +S2N_BN_SYMBOL(bignum_mod_p384_6): // Load the complicated lower three words of p_384 = [-1;-1;-1;n2;n1;n0] diff --git a/arm/p384/bignum_montmul_p384.S b/arm/p384/bignum_montmul_p384.S index 3ffc1cd3da..fce8dfe714 100644 --- a/arm/p384/bignum_montmul_p384.S +++ b/arm/p384/bignum_montmul_p384.S @@ -26,9 +26,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montmul_p384 - .globl _bignum_montmul_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384) .text .balign 4 @@ -113,8 +114,7 @@ #define t3 x23 #define t4 x24 -bignum_montmul_p384: -_bignum_montmul_p384: +S2N_BN_SYMBOL(bignum_montmul_p384): // Save some registers diff --git a/arm/p384/bignum_montmul_p384_alt.S b/arm/p384/bignum_montmul_p384_alt.S index ec03123d0a..c3a3262738 100644 --- a/arm/p384/bignum_montmul_p384_alt.S +++ b/arm/p384/bignum_montmul_p384_alt.S @@ -26,9 +26,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montmul_p384_alt - .globl _bignum_montmul_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384_alt) .text .balign 4 @@ -102,8 +103,7 @@ #define u11 x1 // same as x #define h b5 // same as b5 -bignum_montmul_p384_alt: -_bignum_montmul_p384_alt: +S2N_BN_SYMBOL(bignum_montmul_p384_alt): // Save more registers diff --git a/arm/p384/bignum_montsqr_p384.S b/arm/p384/bignum_montsqr_p384.S index 01b52cc000..0ad3906e31 100644 --- a/arm/p384/bignum_montsqr_p384.S +++ b/arm/p384/bignum_montsqr_p384.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montsqr_p384 - .globl _bignum_montsqr_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384) .text .balign 4 @@ -104,8 +105,7 @@ #define d3 x16 #define d4 x17 -bignum_montsqr_p384: -_bignum_montsqr_p384: +S2N_BN_SYMBOL(bignum_montsqr_p384): // Load in all words of the input diff --git a/arm/p384/bignum_montsqr_p384_alt.S b/arm/p384/bignum_montsqr_p384_alt.S index 7a0ed8de8e..8600c04890 100644 --- a/arm/p384/bignum_montsqr_p384_alt.S +++ b/arm/p384/bignum_montsqr_p384_alt.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montsqr_p384_alt - .globl _bignum_montsqr_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384_alt) .text .balign 4 @@ -90,8 +91,7 @@ #define u11 x20 #define h x6 // same as a4 -bignum_montsqr_p384_alt: -_bignum_montsqr_p384_alt: +S2N_BN_SYMBOL(bignum_montsqr_p384_alt): // It's convenient to have two more registers to play with diff --git a/arm/p384/bignum_mux_6.S b/arm/p384/bignum_mux_6.S index e8f13bc2a8..d8ea1756ac 100644 --- a/arm/p384/bignum_mux_6.S +++ b/arm/p384/bignum_mux_6.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = p, X1 = z, X2 = x, X3 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mux_6 - .globl _bignum_mux_6 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mux_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mux_6) .text .balign 4 @@ -38,10 +39,9 @@ #define a x4 -bignum_mux_6: -_bignum_mux_6: +S2N_BN_SYMBOL(bignum_mux_6): - cmp p, #0 // Set condition codes p = 0 +cmp p, #0 // Set condition codes p = 0 ldr a, [x] ldr p, [y] diff --git a/arm/p384/bignum_neg_p384.S b/arm/p384/bignum_neg_p384.S index 94a23c3f11..67f725db6c 100644 --- a/arm/p384/bignum_neg_p384.S +++ b/arm/p384/bignum_neg_p384.S @@ -21,9 +21,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_neg_p384 - .globl _bignum_neg_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p384) .text .balign 4 @@ -41,8 +42,7 @@ #define d5 x9 -bignum_neg_p384: -_bignum_neg_p384: +S2N_BN_SYMBOL(bignum_neg_p384): // Load the 6 digits of x diff --git a/arm/p384/bignum_nonzero_6.S b/arm/p384/bignum_nonzero_6.S index 29e3ca6b31..79e0ba3f37 100644 --- a/arm/p384/bignum_nonzero_6.S +++ b/arm/p384/bignum_nonzero_6.S @@ -21,9 +21,10 @@ // // Standard ARM ABI: X0 = x, returns X0 // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_nonzero_6 - .globl _bignum_nonzero_6 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_nonzero_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_nonzero_6) .text .balign 4 @@ -33,8 +34,7 @@ #define c x3 -bignum_nonzero_6: -_bignum_nonzero_6: +S2N_BN_SYMBOL(bignum_nonzero_6): // Generate a = an OR of all the words in the bignum diff --git a/arm/p384/bignum_optneg_p384.S b/arm/p384/bignum_optneg_p384.S index cc269d28d9..096e583949 100644 --- a/arm/p384/bignum_optneg_p384.S +++ b/arm/p384/bignum_optneg_p384.S @@ -23,9 +23,10 @@ // // Standard ARM ABI: X0 = z, X1 = p, X2 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_optneg_p384 - .globl _bignum_optneg_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p384) .text .balign 4 @@ -47,8 +48,7 @@ #define n5 x14 -bignum_optneg_p384: -_bignum_optneg_p384: +S2N_BN_SYMBOL(bignum_optneg_p384): // Load the 6 digits of x diff --git a/arm/p384/bignum_sub_p384.S b/arm/p384/bignum_sub_p384.S index 5e83fc26a0..14d84dcc02 100644 --- a/arm/p384/bignum_sub_p384.S +++ b/arm/p384/bignum_sub_p384.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_sub_p384 - .globl _bignum_sub_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p384) .text .balign 4 @@ -42,8 +43,7 @@ #define d5 x10 -bignum_sub_p384: -_bignum_sub_p384: +S2N_BN_SYMBOL(bignum_sub_p384): // First just subtract the numbers as [d5; d4; d3; d2; d1; d0] // Set a mask based on (inverted) carry indicating x < y = correction is needed diff --git a/arm/p384/bignum_tomont_p384.S b/arm/p384/bignum_tomont_p384.S index a3ca9c3416..0aab015130 100644 --- a/arm/p384/bignum_tomont_p384.S +++ b/arm/p384/bignum_tomont_p384.S @@ -22,11 +22,12 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_tomont_p384 - .globl _bignum_tomont_p384 - .globl bignum_tomont_p384_alt - .globl _bignum_tomont_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384_alt) .text .balign 4 @@ -72,10 +73,9 @@ adcs d4, d4, t3; \ adc d5, d5, t3 -bignum_tomont_p384: -_bignum_tomont_p384: -bignum_tomont_p384_alt: -_bignum_tomont_p384_alt: +S2N_BN_SYMBOL(bignum_tomont_p384): + +S2N_BN_SYMBOL(bignum_tomont_p384_alt): #define d0 x2 #define d1 x3 diff --git a/arm/p384/bignum_triple_p384.S b/arm/p384/bignum_triple_p384.S index 83ebcefbcf..00f4bdc05d 100644 --- a/arm/p384/bignum_triple_p384.S +++ b/arm/p384/bignum_triple_p384.S @@ -25,11 +25,12 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_triple_p384 - .globl _bignum_triple_p384 - .globl bignum_triple_p384_alt - .globl _bignum_triple_p384_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384_alt) .text .balign 4 @@ -64,10 +65,9 @@ #define t1 x10 -bignum_triple_p384: -_bignum_triple_p384: -bignum_triple_p384_alt: -_bignum_triple_p384_alt: +S2N_BN_SYMBOL(bignum_triple_p384): + +S2N_BN_SYMBOL(bignum_triple_p384_alt): // Load the inputs diff --git a/arm/p521/bignum_add_p521.S b/arm/p521/bignum_add_p521.S index 0c47f9387c..d39c60b920 100644 --- a/arm/p521/bignum_add_p521.S +++ b/arm/p521/bignum_add_p521.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_add_p521 - .globl _bignum_add_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p521) .text .balign 4 @@ -44,8 +45,7 @@ #define d8 x13 -bignum_add_p521: -_bignum_add_p521: +S2N_BN_SYMBOL(bignum_add_p521): // Force carry-in to get s = [d8;d7;d6;d5;d4;d3;d2;d1;d0] = x + y + 1. // We ignore the carry-out, assuming inputs are reduced so there is none. diff --git a/arm/p521/bignum_cmul_p521.S b/arm/p521/bignum_cmul_p521.S index 0491551b4c..ef2d756449 100644 --- a/arm/p521/bignum_cmul_p521.S +++ b/arm/p521/bignum_cmul_p521.S @@ -23,11 +23,12 @@ // // Standard ARM ABI: X0 = z, X1 = c, X2 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_cmul_p521 - .globl _bignum_cmul_p521 - .globl bignum_cmul_p521_alt - .globl _bignum_cmul_p521_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521_alt) .text .balign 4 @@ -63,10 +64,9 @@ #define a8 x14 #define dd x15 -bignum_cmul_p521: -_bignum_cmul_p521: -bignum_cmul_p521_alt: -_bignum_cmul_p521_alt: +S2N_BN_SYMBOL(bignum_cmul_p521): + +S2N_BN_SYMBOL(bignum_cmul_p521_alt): // First do the multiply, getting [d9; ...; d0], and as this is done // accumulate an AND "dd" of digits d7,...,d1 for later use diff --git a/arm/p521/bignum_deamont_p521.S b/arm/p521/bignum_deamont_p521.S index f4c83d9229..2e1e95882e 100644 --- a/arm/p521/bignum_deamont_p521.S +++ b/arm/p521/bignum_deamont_p521.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_deamont_p521 - .globl _bignum_deamont_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p521) .text .balign 4 @@ -55,8 +56,7 @@ #define l x12 #define u x12 -bignum_deamont_p521: -_bignum_deamont_p521: +S2N_BN_SYMBOL(bignum_deamont_p521): // Load all the inputs diff --git a/arm/p521/bignum_demont_p521.S b/arm/p521/bignum_demont_p521.S index bebd89715c..0cd1afbb8f 100644 --- a/arm/p521/bignum_demont_p521.S +++ b/arm/p521/bignum_demont_p521.S @@ -25,9 +25,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_demont_p521 - .globl _bignum_demont_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p521) .text .balign 4 @@ -49,8 +50,7 @@ #define d8 x2 #define c x6 -bignum_demont_p521: -_bignum_demont_p521: +S2N_BN_SYMBOL(bignum_demont_p521): // Rotate, as a 521-bit quantity, by 9*64 - 521 = 55 bits right. diff --git a/arm/p521/bignum_double_p521.S b/arm/p521/bignum_double_p521.S index 75baf8b540..6331d4fc5e 100644 --- a/arm/p521/bignum_double_p521.S +++ b/arm/p521/bignum_double_p521.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_double_p521 - .globl _bignum_double_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p521) .text .balign 4 @@ -35,8 +36,7 @@ #define h x3 #define l x4 -bignum_double_p521: -_bignum_double_p521: +S2N_BN_SYMBOL(bignum_double_p521): // We can decide whether 2 * x >= p_521 just by 2 * x >= 2^521, which // amounts to whether the top word is >= 256 diff --git a/arm/p521/bignum_fromlebytes_p521.S b/arm/p521/bignum_fromlebytes_p521.S index c7a7a013ef..abba7a8b20 100644 --- a/arm/p521/bignum_fromlebytes_p521.S +++ b/arm/p521/bignum_fromlebytes_p521.S @@ -24,9 +24,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_fromlebytes_p521 - .globl _bignum_fromlebytes_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_p521) .text .balign 4 @@ -37,8 +38,7 @@ #define dshort w2 #define a x3 -bignum_fromlebytes_p521: -_bignum_fromlebytes_p521: +S2N_BN_SYMBOL(bignum_fromlebytes_p521): // word 0 diff --git a/arm/p521/bignum_half_p521.S b/arm/p521/bignum_half_p521.S index e7c13c1c52..957abcbbc3 100644 --- a/arm/p521/bignum_half_p521.S +++ b/arm/p521/bignum_half_p521.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_half_p521 - .globl _bignum_half_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p521) .text .balign 4 @@ -45,8 +46,7 @@ #define a x6 -bignum_half_p521: -_bignum_half_p521: +S2N_BN_SYMBOL(bignum_half_p521): // We do a 521-bit rotation one bit right, since 2^521 == 1 (mod p_521) diff --git a/arm/p521/bignum_mod_n521_9.S b/arm/p521/bignum_mod_n521_9.S index 958f09d1ea..a3b733bcd2 100644 --- a/arm/p521/bignum_mod_n521_9.S +++ b/arm/p521/bignum_mod_n521_9.S @@ -24,11 +24,12 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mod_n521_9 - .globl _bignum_mod_n521_9 - .globl bignum_mod_n521_9_alt - .globl _bignum_mod_n521_9_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9_alt) .text .balign 4 @@ -63,10 +64,9 @@ movk nn, n2, lsl #32; \ movk nn, n3, lsl #48 -bignum_mod_n521_9: -_bignum_mod_n521_9: -bignum_mod_n521_9_alt: -_bignum_mod_n521_9_alt: +S2N_BN_SYMBOL(bignum_mod_n521_9): + +S2N_BN_SYMBOL(bignum_mod_n521_9_alt): // Load the top digit first into d8. // The initial quotient estimate is q = h + 1 where x = 2^521 * h + t diff --git a/arm/p521/bignum_mod_p521_9.S b/arm/p521/bignum_mod_p521_9.S index 2a7d990100..ae0bfdc706 100644 --- a/arm/p521/bignum_mod_p521_9.S +++ b/arm/p521/bignum_mod_p521_9.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mod_p521_9 - .globl _bignum_mod_p521_9 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p521_9) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p521_9) .text .balign 4 @@ -43,8 +44,7 @@ #define d7 x11 #define d8 x12 -bignum_mod_p521_9: -_bignum_mod_p521_9: +S2N_BN_SYMBOL(bignum_mod_p521_9): // Load top digit first and get its upper bits in h so that we // separate out x = 2^521 * H + L with h = H. Now x mod p_521 = diff --git a/arm/p521/bignum_montmul_p521.S b/arm/p521/bignum_montmul_p521.S index 6543afa732..809cedac34 100644 --- a/arm/p521/bignum_montmul_p521.S +++ b/arm/p521/bignum_montmul_p521.S @@ -27,9 +27,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montmul_p521 - .globl _bignum_montmul_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521) .text .balign 4 @@ -158,8 +159,7 @@ adcs s6, s6, c; \ adc s7, s7, c \ -bignum_montmul_p521: -_bignum_montmul_p521: +S2N_BN_SYMBOL(bignum_montmul_p521): // Save registers and make space for the temporary buffer diff --git a/arm/p521/bignum_montmul_p521_alt.S b/arm/p521/bignum_montmul_p521_alt.S index d31e280e22..ed84567a7c 100644 --- a/arm/p521/bignum_montmul_p521_alt.S +++ b/arm/p521/bignum_montmul_p521_alt.S @@ -27,9 +27,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montmul_p521_alt - .globl _bignum_montmul_p521_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521_alt) .text .balign 4 @@ -82,8 +83,7 @@ #define u15 x20 #define u16 x21 -bignum_montmul_p521_alt: -_bignum_montmul_p521_alt: +S2N_BN_SYMBOL(bignum_montmul_p521_alt): // Save more registers and make space for the temporary buffer diff --git a/arm/p521/bignum_montsqr_p521.S b/arm/p521/bignum_montsqr_p521.S index fb112ccb3a..6ec3332734 100644 --- a/arm/p521/bignum_montsqr_p521.S +++ b/arm/p521/bignum_montsqr_p521.S @@ -27,9 +27,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montsqr_p521 - .globl _bignum_montsqr_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521) .text .balign 4 @@ -73,8 +74,7 @@ #define d7 x9 #define d8 x10 -bignum_montsqr_p521: -_bignum_montsqr_p521: +S2N_BN_SYMBOL(bignum_montsqr_p521): // Save registers diff --git a/arm/p521/bignum_montsqr_p521_alt.S b/arm/p521/bignum_montsqr_p521_alt.S index 20e65296e5..fd30cd81b3 100644 --- a/arm/p521/bignum_montsqr_p521_alt.S +++ b/arm/p521/bignum_montsqr_p521_alt.S @@ -27,9 +27,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_montsqr_p521_alt - .globl _bignum_montsqr_p521_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521_alt) .text .balign 4 @@ -66,8 +67,7 @@ #define u15 x27 #define u16 x29 -bignum_montsqr_p521_alt: -_bignum_montsqr_p521_alt: +S2N_BN_SYMBOL(bignum_montsqr_p521_alt): // It's convenient to have more registers to play with diff --git a/arm/p521/bignum_mul_p521.S b/arm/p521/bignum_mul_p521.S index 832077690c..d4ed4b7e0a 100644 --- a/arm/p521/bignum_mul_p521.S +++ b/arm/p521/bignum_mul_p521.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mul_p521 - .globl _bignum_mul_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521) .text .balign 4 @@ -153,8 +154,7 @@ adcs s6, s6, c; \ adc s7, s7, c \ -bignum_mul_p521: -_bignum_mul_p521: +S2N_BN_SYMBOL(bignum_mul_p521): // Save registers and make space for the temporary buffer diff --git a/arm/p521/bignum_mul_p521_alt.S b/arm/p521/bignum_mul_p521_alt.S index aac74488ed..f0b6ee47a5 100644 --- a/arm/p521/bignum_mul_p521_alt.S +++ b/arm/p521/bignum_mul_p521_alt.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_mul_p521_alt - .globl _bignum_mul_p521_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521_alt) .text .balign 4 @@ -77,8 +78,7 @@ #define u15 x20 #define u16 x21 -bignum_mul_p521_alt: -_bignum_mul_p521_alt: +S2N_BN_SYMBOL(bignum_mul_p521_alt): // Save more registers and make temporary space on stack diff --git a/arm/p521/bignum_neg_p521.S b/arm/p521/bignum_neg_p521.S index e13bac25c0..90292e39ed 100644 --- a/arm/p521/bignum_neg_p521.S +++ b/arm/p521/bignum_neg_p521.S @@ -21,9 +21,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_neg_p521 - .globl _bignum_neg_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p521) .text .balign 4 @@ -42,8 +43,7 @@ #define d7 x10 #define d8 x11 -bignum_neg_p521: -_bignum_neg_p521: +S2N_BN_SYMBOL(bignum_neg_p521): // Load the 9 digits of x and generate p = the OR of them all diff --git a/arm/p521/bignum_optneg_p521.S b/arm/p521/bignum_optneg_p521.S index 3834f9abd9..2822ab4c69 100644 --- a/arm/p521/bignum_optneg_p521.S +++ b/arm/p521/bignum_optneg_p521.S @@ -23,9 +23,10 @@ // // Standard ARM ABI: X0 = z, X1 = p, X2 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_optneg_p521 - .globl _bignum_optneg_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p521) .text .balign 4 @@ -44,8 +45,7 @@ #define d7 x11 #define d8 x12 -bignum_optneg_p521: -_bignum_optneg_p521: +S2N_BN_SYMBOL(bignum_optneg_p521): // Load the 9 digits of x and generate q = the OR of them all diff --git a/arm/p521/bignum_sqr_p521.S b/arm/p521/bignum_sqr_p521.S index a3c618b5b6..bb68c3396c 100644 --- a/arm/p521/bignum_sqr_p521.S +++ b/arm/p521/bignum_sqr_p521.S @@ -21,9 +21,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_sqr_p521 - .globl _bignum_sqr_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521) .text .balign 4 @@ -67,8 +68,7 @@ #define d7 x9 #define d8 x10 -bignum_sqr_p521: -_bignum_sqr_p521: +S2N_BN_SYMBOL(bignum_sqr_p521): // Save registers diff --git a/arm/p521/bignum_sqr_p521_alt.S b/arm/p521/bignum_sqr_p521_alt.S index c2d7bc996b..10417d12e1 100644 --- a/arm/p521/bignum_sqr_p521_alt.S +++ b/arm/p521/bignum_sqr_p521_alt.S @@ -21,9 +21,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_sqr_p521_alt - .globl _bignum_sqr_p521_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521_alt) .text .balign 4 @@ -60,8 +61,7 @@ #define u15 x27 #define u16 x29 -bignum_sqr_p521_alt: -_bignum_sqr_p521_alt: +S2N_BN_SYMBOL(bignum_sqr_p521_alt): // It's convenient to have more registers to play with diff --git a/arm/p521/bignum_sub_p521.S b/arm/p521/bignum_sub_p521.S index f1cd34ed4b..1a074aed12 100644 --- a/arm/p521/bignum_sub_p521.S +++ b/arm/p521/bignum_sub_p521.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_sub_p521 - .globl _bignum_sub_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p521) .text .balign 4 @@ -44,8 +45,7 @@ #define d8 x13 -bignum_sub_p521: -_bignum_sub_p521: +S2N_BN_SYMBOL(bignum_sub_p521): // First just subtract the numbers as [d8;d7;d6;d5;d4;d3;d2;d1;d0] = x - y diff --git a/arm/p521/bignum_tolebytes_p521.S b/arm/p521/bignum_tolebytes_p521.S index 16f87ec272..397d2a73bf 100644 --- a/arm/p521/bignum_tolebytes_p521.S +++ b/arm/p521/bignum_tolebytes_p521.S @@ -24,9 +24,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_tolebytes_p521 - .globl _bignum_tolebytes_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_p521) .text .balign 4 @@ -36,8 +37,7 @@ #define d x2 #define dshort w2 -bignum_tolebytes_p521: -_bignum_tolebytes_p521: +S2N_BN_SYMBOL(bignum_tolebytes_p521): // word 0 diff --git a/arm/p521/bignum_tomont_p521.S b/arm/p521/bignum_tomont_p521.S index 0aac27f3ba..1ca5089f9e 100644 --- a/arm/p521/bignum_tomont_p521.S +++ b/arm/p521/bignum_tomont_p521.S @@ -22,9 +22,10 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_tomont_p521 - .globl _bignum_tomont_p521 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p521) .text .balign 4 @@ -43,8 +44,7 @@ #define d7 x11 #define d8 x12 -bignum_tomont_p521: -_bignum_tomont_p521: +S2N_BN_SYMBOL(bignum_tomont_p521): // Load top digit first and get its upper bits in h so that we // separate out x = 2^521 * H + L with h = H. Now x mod p_521 = diff --git a/arm/p521/bignum_triple_p521.S b/arm/p521/bignum_triple_p521.S index 0a51a5322f..7b0caca30a 100644 --- a/arm/p521/bignum_triple_p521.S +++ b/arm/p521/bignum_triple_p521.S @@ -22,11 +22,12 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_triple_p521 - .globl _bignum_triple_p521 - .globl bignum_triple_p521_alt - .globl _bignum_triple_p521_alt + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521_alt) .text .balign 4 @@ -47,10 +48,9 @@ #define d8 x12 -bignum_triple_p521: -_bignum_triple_p521: -bignum_triple_p521_alt: -_bignum_triple_p521_alt: +S2N_BN_SYMBOL(bignum_triple_p521): + +S2N_BN_SYMBOL(bignum_triple_p521_alt): // Pick out top bit to wrap to the zero position in the doubling step diff --git a/include/_internal_s2n_bignum.h b/include/_internal_s2n_bignum.h new file mode 100644 index 0000000000..490fe3dc0b --- /dev/null +++ b/include/_internal_s2n_bignum.h @@ -0,0 +1,18 @@ + +#ifdef __APPLE__ +#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl _##name +#ifdef S2N_BN_HIDE_SYMBOLS +#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern _##name +#else +#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ +#endif +#define S2N_BN_SYMBOL(name) _##name +#else +#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl name +#ifdef S2N_BN_HIDE_SYMBOLS +#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .hidden name +#else +#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ +#endif +#define S2N_BN_SYMBOL(name) name +#endif \ No newline at end of file diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S index 958f3a62cb..fad953baf4 100644 --- a/x86_att/curve25519/bignum_neg_p25519.S +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -24,9 +24,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_neg_p25519 - .globl _bignum_neg_p25519 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p25519) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p25519) .text #define z %rdi @@ -42,8 +43,7 @@ #define qshort %esi -bignum_neg_p25519: -_bignum_neg_p25519: +S2N_BN_SYMBOL(bignum_neg_p25519): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_add_p384.S b/x86_att/p384/bignum_add_p384.S index 82f513c70f..5467fb94af 100644 --- a/x86_att/p384/bignum_add_p384.S +++ b/x86_att/p384/bignum_add_p384.S @@ -24,10 +24,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_add_p384 - .globl _bignum_add_p384 - .text + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p384) #define z %rdi #define x %rsi @@ -50,8 +50,7 @@ -bignum_add_p384: -_bignum_add_p384: +S2N_BN_SYMBOL(bignum_add_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_bigendian_6.S b/x86_att/p384/bignum_bigendian_6.S index 7286fec58c..8ac48bc392 100644 --- a/x86_att/p384/bignum_bigendian_6.S +++ b/x86_att/p384/bignum_bigendian_6.S @@ -36,13 +36,15 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_bigendian_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_bigendian_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_frombebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_frombebytes_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tobebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tobebytes_6) - .globl bignum_bigendian_6 - .globl _bignum_bigendian_6 - .globl bignum_frombebytes_6 - .globl _bignum_frombebytes_6 - .globl bignum_tobebytes_6 - .globl _bignum_tobebytes_6 .text #define z %rdi @@ -56,12 +58,9 @@ // pairs (0-5, 1-4, 2-3) to allow x and z to point to the same buffer // without using more intermediate registers. -bignum_bigendian_6: -_bignum_bigendian_6: -bignum_frombebytes_6: -_bignum_frombebytes_6: -bignum_tobebytes_6: -_bignum_tobebytes_6: +S2N_BN_SYMBOL(bignum_bigendian_6): +S2N_BN_SYMBOL(bignum_frombebytes_6): +S2N_BN_SYMBOL(bignum_tobebytes_6): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_cmul_p384.S b/x86_att/p384/bignum_cmul_p384.S index 97618b19b0..a186cc556d 100644 --- a/x86_att/p384/bignum_cmul_p384.S +++ b/x86_att/p384/bignum_cmul_p384.S @@ -25,9 +25,10 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_cmul_p384 - .globl _bignum_cmul_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384) .text #define z %rdi @@ -54,8 +55,7 @@ #define qshort %edx -bignum_cmul_p384: -_bignum_cmul_p384: +S2N_BN_SYMBOL(bignum_cmul_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index 93a54b5d60..1af6b54f03 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -25,10 +25,10 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_cmul_p384_alt - .globl _bignum_cmul_p384_alt - .text + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384_alt) #define z %rdi @@ -58,8 +58,7 @@ #define cshort %ecx #define qshort %ecx -bignum_cmul_p384_alt: -_bignum_cmul_p384_alt: +S2N_BN_SYMBOL(bignum_cmul_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index 18a29f3218..a0fbde1a95 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -27,11 +27,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_deamont_p384 - .globl _bignum_deamont_p384 - .text - + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384) #define z %rdi #define x %rsi @@ -80,8 +79,7 @@ movq %rdx, d6 ; \ sbbq $0, d6 -bignum_deamont_p384: -_bignum_deamont_p384: +S2N_BN_SYMBOL(bignum_deamont_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index ca2d56e104..b916b9118e 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -27,10 +27,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_deamont_p384_alt - .globl _bignum_deamont_p384_alt - .text + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384_alt) #define z %rdi #define x %rsi @@ -80,8 +80,7 @@ movq %rcx, d6 ; \ sbbq $0, d6 -bignum_deamont_p384_alt: -_bignum_deamont_p384_alt: +S2N_BN_SYMBOL(bignum_deamont_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index 004041fe11..20dd03a54b 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -27,10 +27,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_demont_p384 - .globl _bignum_demont_p384 - .text + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384) #define z %rdi #define x %rsi @@ -72,8 +72,7 @@ movq %rdx, d6 ; \ sbbq $0, d6 -bignum_demont_p384: -_bignum_demont_p384: +S2N_BN_SYMBOL(bignum_demont_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index 49997c920c..652273498d 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -27,10 +27,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_demont_p384_alt - .globl _bignum_demont_p384_alt - .text + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384_alt) #define z %rdi #define x %rsi @@ -72,8 +72,7 @@ movq %rcx, d6 ; \ sbbq $0, d6 -bignum_demont_p384_alt: -_bignum_demont_p384_alt: +S2N_BN_SYMBOL(bignum_demont_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_double_p384.S b/x86_att/p384/bignum_double_p384.S index 20b47c5c03..f046ea06eb 100644 --- a/x86_att/p384/bignum_double_p384.S +++ b/x86_att/p384/bignum_double_p384.S @@ -24,9 +24,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_double_p384 - .globl _bignum_double_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p384) .text #define z %rdi @@ -48,8 +49,7 @@ -bignum_double_p384: -_bignum_double_p384: +S2N_BN_SYMBOL(bignum_double_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_half_p384.S b/x86_att/p384/bignum_half_p384.S index d7020baf07..75a18cafb3 100644 --- a/x86_att/p384/bignum_half_p384.S +++ b/x86_att/p384/bignum_half_p384.S @@ -24,9 +24,10 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" - .globl bignum_half_p384 - .globl _bignum_half_p384 + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p384) .text #define z %rdi @@ -45,8 +46,7 @@ -bignum_half_p384: -_bignum_half_p384: +S2N_BN_SYMBOL(bignum_half_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_littleendian_6.S b/x86_att/p384/bignum_littleendian_6.S index a62797d102..7acfa98303 100644 --- a/x86_att/p384/bignum_littleendian_6.S +++ b/x86_att/p384/bignum_littleendian_6.S @@ -35,25 +35,24 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_littleendian_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_littleendian_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_6) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_6) - .globl bignum_littleendian_6 - .globl _bignum_littleendian_6 - .globl bignum_fromlebytes_6 - .globl _bignum_fromlebytes_6 - .globl bignum_tolebytes_6 - .globl _bignum_tolebytes_6 .text #define z %rdi #define x %rsi #define a %rax -bignum_littleendian_6: -_bignum_littleendian_6: -bignum_fromlebytes_6: -_bignum_fromlebytes_6: -bignum_tolebytes_6: -_bignum_tolebytes_6: +S2N_BN_SYMBOL(bignum_littleendian_6): +S2N_BN_SYMBOL(bignum_fromlebytes_6): +S2N_BN_SYMBOL(bignum_tolebytes_6): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mod_n384.S b/x86_att/p384/bignum_mod_n384.S index 50675e9d2a..18fa903993 100644 --- a/x86_att/p384/bignum_mod_n384.S +++ b/x86_att/p384/bignum_mod_n384.S @@ -26,9 +26,11 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384) - .globl bignum_mod_n384 - .globl _bignum_mod_n384 .text #define z %rdi @@ -52,8 +54,7 @@ #define qshort %edx -bignum_mod_n384: -_bignum_mod_n384: +S2N_BN_SYMBOL(bignum_mod_n384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mod_n384_6.S b/x86_att/p384/bignum_mod_n384_6.S index 4a0a4ac564..b84c68a6ec 100644 --- a/x86_att/p384/bignum_mod_n384_6.S +++ b/x86_att/p384/bignum_mod_n384_6.S @@ -26,9 +26,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_6) - .globl bignum_mod_n384_6 - .globl _bignum_mod_n384_6 .text #define z %rdi @@ -49,8 +51,7 @@ -bignum_mod_n384_6: -_bignum_mod_n384_6: +S2N_BN_SYMBOL(bignum_mod_n384_6): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mod_n384_alt.S b/x86_att/p384/bignum_mod_n384_alt.S index 6d2dc2b7f8..3e13a4bd51 100644 --- a/x86_att/p384/bignum_mod_n384_alt.S +++ b/x86_att/p384/bignum_mod_n384_alt.S @@ -26,9 +26,11 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_alt) - .globl bignum_mod_n384_alt - .globl _bignum_mod_n384_alt .text #define z %rdi @@ -53,8 +55,7 @@ #define n0short %eax #define qshort %ebp -bignum_mod_n384_alt: -_bignum_mod_n384_alt: +S2N_BN_SYMBOL(bignum_mod_n384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mod_p384.S b/x86_att/p384/bignum_mod_p384.S index c82561d6a6..b558054813 100644 --- a/x86_att/p384/bignum_mod_p384.S +++ b/x86_att/p384/bignum_mod_p384.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384) - .globl bignum_mod_p384 - .globl _bignum_mod_p384 .text #define z %rdi @@ -51,8 +53,7 @@ #define qshort %edx -bignum_mod_p384: -_bignum_mod_p384: +S2N_BN_SYMBOL(bignum_mod_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mod_p384_6.S b/x86_att/p384/bignum_mod_p384_6.S index bbd12524a2..cca2ff0f7f 100644 --- a/x86_att/p384/bignum_mod_p384_6.S +++ b/x86_att/p384/bignum_mod_p384_6.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_6) - .globl bignum_mod_p384_6 - .globl _bignum_mod_p384_6 .text #define z %rdi @@ -48,8 +50,7 @@ -bignum_mod_p384_6: -_bignum_mod_p384_6: +S2N_BN_SYMBOL(bignum_mod_p384_6): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mod_p384_alt.S b/x86_att/p384/bignum_mod_p384_alt.S index 69ee474326..926a2bbd08 100644 --- a/x86_att/p384/bignum_mod_p384_alt.S +++ b/x86_att/p384/bignum_mod_p384_alt.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_alt) - .globl bignum_mod_p384_alt - .globl _bignum_mod_p384_alt .text #define z %rdi @@ -55,8 +57,7 @@ #define qshort %ebx -bignum_mod_p384_alt: -_bignum_mod_p384_alt: +S2N_BN_SYMBOL(bignum_mod_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_montmul_p384.S b/x86_att/p384/bignum_montmul_p384.S index 160c293430..ec40ebafce 100644 --- a/x86_att/p384/bignum_montmul_p384.S +++ b/x86_att/p384/bignum_montmul_p384.S @@ -28,9 +28,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ----------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384) - .globl bignum_montmul_p384 - .globl _bignum_montmul_p384 .text #define z %rdi @@ -95,8 +97,7 @@ addq %rdx, d6 ; \ adcq $0, d7 -bignum_montmul_p384: -_bignum_montmul_p384: +S2N_BN_SYMBOL(bignum_montmul_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_montmul_p384_alt.S b/x86_att/p384/bignum_montmul_p384_alt.S index 3d24dc4ccf..8a15b4995e 100644 --- a/x86_att/p384/bignum_montmul_p384_alt.S +++ b/x86_att/p384/bignum_montmul_p384_alt.S @@ -28,9 +28,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ----------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384_alt) - .globl bignum_montmul_p384_alt - .globl _bignum_montmul_p384_alt .text #define z %rdi @@ -117,8 +119,7 @@ addq %rbx, d6 ; \ adcq $0, d7 -bignum_montmul_p384_alt: -_bignum_montmul_p384_alt: +S2N_BN_SYMBOL(bignum_montmul_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_montsqr_p384.S b/x86_att/p384/bignum_montsqr_p384.S index bd0517b911..af245ed91a 100644 --- a/x86_att/p384/bignum_montsqr_p384.S +++ b/x86_att/p384/bignum_montsqr_p384.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384) - .globl bignum_montsqr_p384 - .globl _bignum_montsqr_p384 .text #define z %rdi @@ -92,8 +94,7 @@ movq %rdx, d6 ; \ sbbq $0, d6 -bignum_montsqr_p384: -_bignum_montsqr_p384: +S2N_BN_SYMBOL(bignum_montsqr_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_montsqr_p384_alt.S b/x86_att/p384/bignum_montsqr_p384_alt.S index 5588835538..9b4c9374fe 100644 --- a/x86_att/p384/bignum_montsqr_p384_alt.S +++ b/x86_att/p384/bignum_montsqr_p384_alt.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384_alt) - .globl bignum_montsqr_p384_alt - .globl _bignum_montsqr_p384_alt .text #define z %rdi @@ -114,8 +116,7 @@ movq %rbx, d6 ; \ sbbq $0, d6 -bignum_montsqr_p384_alt: -_bignum_montsqr_p384_alt: +S2N_BN_SYMBOL(bignum_montsqr_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_mux_6.S b/x86_att/p384/bignum_mux_6.S index 92f222c080..78e055f314 100644 --- a/x86_att/p384/bignum_mux_6.S +++ b/x86_att/p384/bignum_mux_6.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = p, RDX = z, R8 = x, R9 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mux_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mux_6) - .globl bignum_mux_6 - .globl _bignum_mux_6 .text #define p %rdi @@ -40,8 +42,7 @@ #define b %r8 -bignum_mux_6: -_bignum_mux_6: +S2N_BN_SYMBOL(bignum_mux_6): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_neg_p384.S b/x86_att/p384/bignum_neg_p384.S index 42094e3bc0..5522bd22e9 100644 --- a/x86_att/p384/bignum_neg_p384.S +++ b/x86_att/p384/bignum_neg_p384.S @@ -23,9 +23,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p384) - .globl bignum_neg_p384 - .globl _bignum_neg_p384 .text #define z %rdi @@ -40,8 +42,7 @@ #define n0short %eax -bignum_neg_p384: -_bignum_neg_p384: +S2N_BN_SYMBOL(bignum_neg_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_nonzero_6.S b/x86_att/p384/bignum_nonzero_6.S index f75b3ebec2..9c906b34d4 100644 --- a/x86_att/p384/bignum_nonzero_6.S +++ b/x86_att/p384/bignum_nonzero_6.S @@ -23,9 +23,11 @@ // Microsoft x64 ABI: RCX = x, returns RAX // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_nonzero_6) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_nonzero_6) - .globl bignum_nonzero_6 - .globl _bignum_nonzero_6 .text #define x %rdi @@ -35,8 +37,7 @@ -bignum_nonzero_6: -_bignum_nonzero_6: +S2N_BN_SYMBOL(bignum_nonzero_6): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_optneg_p384.S b/x86_att/p384/bignum_optneg_p384.S index 1b20bb52e4..ca72064e49 100644 --- a/x86_att/p384/bignum_optneg_p384.S +++ b/x86_att/p384/bignum_optneg_p384.S @@ -25,9 +25,11 @@ // Microsoft x64 ABI: RCX = z, RDX = p, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p384) - .globl bignum_optneg_p384 - .globl _bignum_optneg_p384 .text #define z %rdi @@ -43,8 +45,7 @@ #define n0short %eax -bignum_optneg_p384: -_bignum_optneg_p384: +S2N_BN_SYMBOL(bignum_optneg_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_sub_p384.S b/x86_att/p384/bignum_sub_p384.S index 2738c1b442..4a687111e0 100644 --- a/x86_att/p384/bignum_sub_p384.S +++ b/x86_att/p384/bignum_sub_p384.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p384) - .globl bignum_sub_p384 - .globl _bignum_sub_p384 .text #define z %rdi @@ -49,8 +51,7 @@ -bignum_sub_p384: -_bignum_sub_p384: +S2N_BN_SYMBOL(bignum_sub_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_tomont_p384.S b/x86_att/p384/bignum_tomont_p384.S index 03e2ca53e4..e5c16a05bf 100644 --- a/x86_att/p384/bignum_tomont_p384.S +++ b/x86_att/p384/bignum_tomont_p384.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384) - .globl bignum_tomont_p384 - .globl _bignum_tomont_p384 .text #define z %rdi @@ -94,8 +96,7 @@ addq %rdx, d6 ; \ adcq $0, d7 -bignum_tomont_p384: -_bignum_tomont_p384: +S2N_BN_SYMBOL(bignum_tomont_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_tomont_p384_alt.S b/x86_att/p384/bignum_tomont_p384_alt.S index 5eab13d194..434d5b6115 100644 --- a/x86_att/p384/bignum_tomont_p384_alt.S +++ b/x86_att/p384/bignum_tomont_p384_alt.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384_alt) - .globl bignum_tomont_p384_alt - .globl _bignum_tomont_p384_alt .text #define z %rdi @@ -112,8 +114,7 @@ addq %rbx, d6 ; \ adcq $0, d7 -bignum_tomont_p384_alt: -_bignum_tomont_p384_alt: +S2N_BN_SYMBOL(bignum_tomont_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_triple_p384.S b/x86_att/p384/bignum_triple_p384.S index 237f10b929..c0a20bc5b9 100644 --- a/x86_att/p384/bignum_triple_p384.S +++ b/x86_att/p384/bignum_triple_p384.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384) - .globl bignum_triple_p384 - .globl _bignum_triple_p384 .text #define z %rdi @@ -49,8 +51,7 @@ #define ashort %eax #define qshort %edx -bignum_triple_p384: -_bignum_triple_p384: +S2N_BN_SYMBOL(bignum_triple_p384): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p384/bignum_triple_p384_alt.S b/x86_att/p384/bignum_triple_p384_alt.S index 7ae907863e..276fd8b2c3 100644 --- a/x86_att/p384/bignum_triple_p384_alt.S +++ b/x86_att/p384/bignum_triple_p384_alt.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384_alt) - .globl bignum_triple_p384_alt - .globl _bignum_triple_p384_alt .text #define z %rdi @@ -52,8 +54,7 @@ #define qshort %ecx #define dshort %edx -bignum_triple_p384_alt: -_bignum_triple_p384_alt: +S2N_BN_SYMBOL(bignum_triple_p384_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_add_p521.S b/x86_att/p521/bignum_add_p521.S index 34ad429c7e..9b636f7bb7 100644 --- a/x86_att/p521/bignum_add_p521.S +++ b/x86_att/p521/bignum_add_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p521) - .globl bignum_add_p521 - .globl _bignum_add_p521 .text #define z %rdi @@ -49,8 +51,7 @@ -bignum_add_p521: -_bignum_add_p521: +S2N_BN_SYMBOL(bignum_add_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_cmul_p521.S b/x86_att/p521/bignum_cmul_p521.S index d4381c99b3..1811760bab 100644 --- a/x86_att/p521/bignum_cmul_p521.S +++ b/x86_att/p521/bignum_cmul_p521.S @@ -25,9 +25,11 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521) - .globl bignum_cmul_p521 - .globl _bignum_cmul_p521 .text #define z %rdi @@ -61,8 +63,7 @@ #define h d9 -bignum_cmul_p521: -_bignum_cmul_p521: +S2N_BN_SYMBOL(bignum_cmul_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_cmul_p521_alt.S b/x86_att/p521/bignum_cmul_p521_alt.S index 8dec8879a7..4c0fc86a10 100644 --- a/x86_att/p521/bignum_cmul_p521_alt.S +++ b/x86_att/p521/bignum_cmul_p521_alt.S @@ -25,9 +25,11 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521_alt) - .globl bignum_cmul_p521_alt - .globl _bignum_cmul_p521_alt .text #define z %rdi @@ -65,8 +67,7 @@ #define h d9 -bignum_cmul_p521_alt: -_bignum_cmul_p521_alt: +S2N_BN_SYMBOL(bignum_cmul_p521_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_deamont_p521.S b/x86_att/p521/bignum_deamont_p521.S index 2543c0ec86..a5b38a7b91 100644 --- a/x86_att/p521/bignum_deamont_p521.S +++ b/x86_att/p521/bignum_deamont_p521.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p521) - .globl bignum_deamont_p521 - .globl _bignum_deamont_p521 .text #define z %rdi @@ -49,8 +51,7 @@ #define d7 %r13 #define d8 %rbp -bignum_deamont_p521: -_bignum_deamont_p521: +S2N_BN_SYMBOL(bignum_deamont_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_demont_p521.S b/x86_att/p521/bignum_demont_p521.S index f38cea2194..87d9fefc9a 100644 --- a/x86_att/p521/bignum_demont_p521.S +++ b/x86_att/p521/bignum_demont_p521.S @@ -27,9 +27,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p521) - .globl bignum_demont_p521 - .globl _bignum_demont_p521 .text #define z %rdi @@ -49,8 +51,7 @@ #define d7 %rcx #define d8 %rdx -bignum_demont_p521: -_bignum_demont_p521: +S2N_BN_SYMBOL(bignum_demont_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_double_p521.S b/x86_att/p521/bignum_double_p521.S index 33b29f8ec5..eb7e3355ed 100644 --- a/x86_att/p521/bignum_double_p521.S +++ b/x86_att/p521/bignum_double_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p521) - .globl bignum_double_p521 - .globl _bignum_double_p521 .text #define z %rdi @@ -37,8 +39,7 @@ -bignum_double_p521: -_bignum_double_p521: +S2N_BN_SYMBOL(bignum_double_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_fromlebytes_p521.S b/x86_att/p521/bignum_fromlebytes_p521.S index 0fdc34ae07..6429824064 100644 --- a/x86_att/p521/bignum_fromlebytes_p521.S +++ b/x86_att/p521/bignum_fromlebytes_p521.S @@ -28,17 +28,18 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_p521) - .globl bignum_fromlebytes_p521 - .globl _bignum_fromlebytes_p521 .text #define z %rdi #define x %rsi #define a %rax -bignum_fromlebytes_p521: -_bignum_fromlebytes_p521: +S2N_BN_SYMBOL(bignum_fromlebytes_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_half_p521.S b/x86_att/p521/bignum_half_p521.S index 5550ba54da..a637153eaa 100644 --- a/x86_att/p521/bignum_half_p521.S +++ b/x86_att/p521/bignum_half_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p521) - .globl bignum_half_p521 - .globl _bignum_half_p521 .text #define z %rdi @@ -48,8 +50,7 @@ -bignum_half_p521: -_bignum_half_p521: +S2N_BN_SYMBOL(bignum_half_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_mod_n521_9.S b/x86_att/p521/bignum_mod_n521_9.S index eb21672efb..5d8bb8ea22 100644 --- a/x86_att/p521/bignum_mod_n521_9.S +++ b/x86_att/p521/bignum_mod_n521_9.S @@ -26,9 +26,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9) - .globl bignum_mod_n521_9 - .globl _bignum_mod_n521_9 .text #define z %rdi @@ -49,8 +51,7 @@ #define cshort %ecx #define qshort %edx -bignum_mod_n521_9: -_bignum_mod_n521_9: +S2N_BN_SYMBOL(bignum_mod_n521_9): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_mod_n521_9_alt.S b/x86_att/p521/bignum_mod_n521_9_alt.S index 2251304d92..555930bf4d 100644 --- a/x86_att/p521/bignum_mod_n521_9_alt.S +++ b/x86_att/p521/bignum_mod_n521_9_alt.S @@ -26,9 +26,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9_alt) - .globl bignum_mod_n521_9_alt - .globl _bignum_mod_n521_9_alt .text #define z %rdi @@ -49,8 +51,7 @@ #define cshort %ecx #define qshort %edx -bignum_mod_n521_9_alt: -_bignum_mod_n521_9_alt: +S2N_BN_SYMBOL(bignum_mod_n521_9_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_mod_p521_9.S b/x86_att/p521/bignum_mod_p521_9.S index cb808d9142..dfe3b20781 100644 --- a/x86_att/p521/bignum_mod_p521_9.S +++ b/x86_att/p521/bignum_mod_p521_9.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p521_9) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p521_9) - .globl bignum_mod_p521_9 - .globl _bignum_mod_p521_9 .text #define z %rdi @@ -48,8 +50,7 @@ #define d7 %rsi -bignum_mod_p521_9: -_bignum_mod_p521_9: +S2N_BN_SYMBOL(bignum_mod_p521_9): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_montmul_p521.S b/x86_att/p521/bignum_montmul_p521.S index 377a6514e0..498422eeb8 100644 --- a/x86_att/p521/bignum_montmul_p521.S +++ b/x86_att/p521/bignum_montmul_p521.S @@ -29,9 +29,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521) - .globl bignum_montmul_p521 - .globl _bignum_montmul_p521 .text #define z %rdi @@ -50,8 +52,7 @@ adcxq %rax, low ; \ adoxq %rbx, high -bignum_montmul_p521: -_bignum_montmul_p521: +S2N_BN_SYMBOL(bignum_montmul_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_montmul_p521_alt.S b/x86_att/p521/bignum_montmul_p521_alt.S index 97dbf57d42..5fa8290e7d 100644 --- a/x86_att/p521/bignum_montmul_p521_alt.S +++ b/x86_att/p521/bignum_montmul_p521_alt.S @@ -29,9 +29,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521_alt) - .globl bignum_montmul_p521_alt - .globl _bignum_montmul_p521_alt .text #define z %rdi @@ -67,8 +69,7 @@ addq %rax, l ; \ adcq %rdx, h -bignum_montmul_p521_alt: -_bignum_montmul_p521_alt: +S2N_BN_SYMBOL(bignum_montmul_p521_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_montsqr_p521.S b/x86_att/p521/bignum_montsqr_p521.S index 8b5cc46d04..b6fbcb8069 100644 --- a/x86_att/p521/bignum_montsqr_p521.S +++ b/x86_att/p521/bignum_montsqr_p521.S @@ -29,9 +29,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521) - .globl bignum_montsqr_p521 - .globl _bignum_montsqr_p521 .text #define z %rdi @@ -61,8 +63,7 @@ adcxq %rax, low ; \ adoxq zero, high -bignum_montsqr_p521: -_bignum_montsqr_p521: +S2N_BN_SYMBOL(bignum_montsqr_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_montsqr_p521_alt.S b/x86_att/p521/bignum_montsqr_p521_alt.S index 62b63ea61e..e710af80b5 100644 --- a/x86_att/p521/bignum_montsqr_p521_alt.S +++ b/x86_att/p521/bignum_montsqr_p521_alt.S @@ -29,9 +29,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521_alt) - .globl bignum_montsqr_p521_alt - .globl _bignum_montsqr_p521_alt .text // Input arguments @@ -96,8 +98,7 @@ adcq %rdx, h ; \ adcq $0, c -bignum_montsqr_p521_alt: -_bignum_montsqr_p521_alt: +S2N_BN_SYMBOL(bignum_montsqr_p521_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_mul_p521.S b/x86_att/p521/bignum_mul_p521.S index d5656f8e45..c4ee6b8e8f 100644 --- a/x86_att/p521/bignum_mul_p521.S +++ b/x86_att/p521/bignum_mul_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521) - .globl bignum_mul_p521 - .globl _bignum_mul_p521 .text #define z %rdi @@ -45,8 +47,7 @@ adcxq %rax, low ; \ adoxq %rbx, high -bignum_mul_p521: -_bignum_mul_p521: +S2N_BN_SYMBOL(bignum_mul_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_mul_p521_alt.S b/x86_att/p521/bignum_mul_p521_alt.S index 2eee34b99e..c24decbb60 100644 --- a/x86_att/p521/bignum_mul_p521_alt.S +++ b/x86_att/p521/bignum_mul_p521_alt.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521_alt) - .globl bignum_mul_p521_alt - .globl _bignum_mul_p521_alt .text #define z %rdi @@ -62,8 +64,7 @@ addq %rax, l ; \ adcq %rdx, h -bignum_mul_p521_alt: -_bignum_mul_p521_alt: +S2N_BN_SYMBOL(bignum_mul_p521_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_neg_p521.S b/x86_att/p521/bignum_neg_p521.S index 453ce2a2df..a69298bb0c 100644 --- a/x86_att/p521/bignum_neg_p521.S +++ b/x86_att/p521/bignum_neg_p521.S @@ -23,9 +23,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p521) - .globl bignum_neg_p521 - .globl _bignum_neg_p521 .text #define z %rdi @@ -39,8 +41,7 @@ #define d4 %r10 #define d5 %r11 -bignum_neg_p521: -_bignum_neg_p521: +S2N_BN_SYMBOL(bignum_neg_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_optneg_p521.S b/x86_att/p521/bignum_optneg_p521.S index 1203a8e735..1a31081950 100644 --- a/x86_att/p521/bignum_optneg_p521.S +++ b/x86_att/p521/bignum_optneg_p521.S @@ -25,9 +25,11 @@ // Microsoft x64 ABI: RCX = z, RDX = p, R8 = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p521) - .globl bignum_optneg_p521 - .globl _bignum_optneg_p521 .text #define z %rdi @@ -41,8 +43,7 @@ #define d3 %r10 #define d4 %r11 -bignum_optneg_p521: -_bignum_optneg_p521: +S2N_BN_SYMBOL(bignum_optneg_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_sqr_p521.S b/x86_att/p521/bignum_sqr_p521.S index 7f6bb58223..423b9058c8 100644 --- a/x86_att/p521/bignum_sqr_p521.S +++ b/x86_att/p521/bignum_sqr_p521.S @@ -23,9 +23,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521) - .globl bignum_sqr_p521 - .globl _bignum_sqr_p521 .text #define z %rdi @@ -55,8 +57,7 @@ adcxq %rax, low ; \ adoxq zero, high -bignum_sqr_p521: -_bignum_sqr_p521: +S2N_BN_SYMBOL(bignum_sqr_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_sqr_p521_alt.S b/x86_att/p521/bignum_sqr_p521_alt.S index 03317366de..8a1e56f76a 100644 --- a/x86_att/p521/bignum_sqr_p521_alt.S +++ b/x86_att/p521/bignum_sqr_p521_alt.S @@ -23,9 +23,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521_alt) - .globl bignum_sqr_p521_alt - .globl _bignum_sqr_p521_alt .text // Input arguments @@ -90,8 +92,7 @@ adcq %rdx, h ; \ adcq $0, c -bignum_sqr_p521_alt: -_bignum_sqr_p521_alt: +S2N_BN_SYMBOL(bignum_sqr_p521_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_sub_p521.S b/x86_att/p521/bignum_sub_p521.S index fd68c98026..a41965e4ae 100644 --- a/x86_att/p521/bignum_sub_p521.S +++ b/x86_att/p521/bignum_sub_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p521) - .globl bignum_sub_p521 - .globl _bignum_sub_p521 .text #define z %rdi @@ -48,8 +50,7 @@ -bignum_sub_p521: -_bignum_sub_p521: +S2N_BN_SYMBOL(bignum_sub_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_tolebytes_p521.S b/x86_att/p521/bignum_tolebytes_p521.S index e6f6148406..72c13e2c9b 100644 --- a/x86_att/p521/bignum_tolebytes_p521.S +++ b/x86_att/p521/bignum_tolebytes_p521.S @@ -28,17 +28,18 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_p521) - .globl bignum_tolebytes_p521 - .globl _bignum_tolebytes_p521 .text #define z %rdi #define x %rsi #define a %rax -bignum_tolebytes_p521: -_bignum_tolebytes_p521: +S2N_BN_SYMBOL(bignum_tolebytes_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_tomont_p521.S b/x86_att/p521/bignum_tomont_p521.S index a289dcff1b..4291c44cdd 100644 --- a/x86_att/p521/bignum_tomont_p521.S +++ b/x86_att/p521/bignum_tomont_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p521) - .globl bignum_tomont_p521 - .globl _bignum_tomont_p521 .text #define z %rdi @@ -48,8 +50,7 @@ #define d7 %rsi -bignum_tomont_p521: -_bignum_tomont_p521: +S2N_BN_SYMBOL(bignum_tomont_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_triple_p521.S b/x86_att/p521/bignum_triple_p521.S index 47c65c9781..3c02c2c4f5 100644 --- a/x86_att/p521/bignum_triple_p521.S +++ b/x86_att/p521/bignum_triple_p521.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521) - .globl bignum_triple_p521 - .globl _bignum_triple_p521 .text #define z %rdi @@ -49,8 +51,7 @@ -bignum_triple_p521: -_bignum_triple_p521: +S2N_BN_SYMBOL(bignum_triple_p521): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/p521/bignum_triple_p521_alt.S b/x86_att/p521/bignum_triple_p521_alt.S index 3dfdfb50aa..4e2283e825 100644 --- a/x86_att/p521/bignum_triple_p521_alt.S +++ b/x86_att/p521/bignum_triple_p521_alt.S @@ -24,9 +24,11 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- +#include "../../include/_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521_alt) - .globl bignum_triple_p521_alt - .globl _bignum_triple_p521_alt .text #define z %rdi @@ -50,8 +52,7 @@ #define a %rax #define d %rdx -bignum_triple_p521_alt: -_bignum_triple_p521_alt: +S2N_BN_SYMBOL(bignum_triple_p521_alt): #if WINDOWS_ABI pushq %rdi From ed031d383aa731073d8db451b3186db8514aed77 Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Fri, 20 May 2022 12:23:37 -0400 Subject: [PATCH 05/42] Cleanup and indentation s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/f96fbcdc5a2ac4a4215bb08b0eeadd7bdbf87819 --- arm/p384/bignum_bigendian_6.S | 7 +++---- arm/p384/bignum_littleendian_6.S | 7 +++---- include/_internal_s2n_bignum.h | 28 ++++++++++++++-------------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/arm/p384/bignum_bigendian_6.S b/arm/p384/bignum_bigendian_6.S index d7f463aee5..e0a85e7ccb 100644 --- a/arm/p384/bignum_bigendian_6.S +++ b/arm/p384/bignum_bigendian_6.S @@ -59,10 +59,9 @@ // to allow x and z to point to the same buffer without using more // intermediate registers. -S2N_BN_SYMBOL(bignum_bignum_bigendian_6): - -S2N_BN_SYMBOL(bignum_bignum_frombebytes_6): -S2N_BN_SYMBOL(bignum_bignum_tobebytes_6): +S2N_BN_SYMBOL(bignum_bigendian_6): +S2N_BN_SYMBOL(bignum_frombebytes_6): +S2N_BN_SYMBOL(bignum_tobebytes_6): // 0 and 5 words diff --git a/arm/p384/bignum_littleendian_6.S b/arm/p384/bignum_littleendian_6.S index 3c35b397f0..19748ecacb 100644 --- a/arm/p384/bignum_littleendian_6.S +++ b/arm/p384/bignum_littleendian_6.S @@ -54,10 +54,9 @@ #define dshort w2 #define a x3 -S2N_BN_SYMBOL(bignum_bignum_littleendian_6): - -S2N_BN_SYMBOL(bignum_bignum_fromlebytes_6): -S2N_BN_SYMBOL(bignum_bignum_tolebytes_6): +S2N_BN_SYMBOL(bignum_littleendian_6): +S2N_BN_SYMBOL(bignum_fromlebytes_6): +S2N_BN_SYMBOL(bignum_tolebytes_6): // word 0 diff --git a/include/_internal_s2n_bignum.h b/include/_internal_s2n_bignum.h index 490fe3dc0b..2557591614 100644 --- a/include/_internal_s2n_bignum.h +++ b/include/_internal_s2n_bignum.h @@ -1,18 +1,18 @@ #ifdef __APPLE__ -#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl _##name -#ifdef S2N_BN_HIDE_SYMBOLS -#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern _##name +# define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl _##name +# ifdef S2N_BN_HIDE_SYMBOLS +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern _##name +# else +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ +# endif +# define S2N_BN_SYMBOL(name) _##name #else -#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ -#endif -#define S2N_BN_SYMBOL(name) _##name -#else -#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl name -#ifdef S2N_BN_HIDE_SYMBOLS -#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .hidden name -#else -#define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ -#endif -#define S2N_BN_SYMBOL(name) name +# define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl name +# ifdef S2N_BN_HIDE_SYMBOLS +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .hidden name +# else +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ +# endif +# define S2N_BN_SYMBOL(name) name #endif \ No newline at end of file From 4cc5280bc0d8972932d8bfd3bef593cd542c9765 Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Fri, 20 May 2022 15:42:34 -0400 Subject: [PATCH 06/42] #include "_internal_s2n_bignum.h" s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/3d6a77435bb3fe2bfcc03680f9637bd2d5d3f2c4 --- arm/curve25519/bignum_neg_p25519.S | 2 +- arm/fastmul/bignum_emontredc_8n.S | 2 +- arm/fastmul/bignum_kmul_16_32.S | 2 +- arm/fastmul/bignum_kmul_32_64.S | 2 +- arm/fastmul/bignum_ksqr_16_32.S | 2 +- arm/fastmul/bignum_ksqr_32_64.S | 2 +- arm/generic/bignum_ge.S | 2 +- arm/generic/bignum_mul.S | 2 +- arm/generic/bignum_optsub.S | 2 +- arm/generic/bignum_sqr.S | 2 +- arm/p384/bignum_add_p384.S | 2 +- arm/p384/bignum_bigendian_6.S | 2 +- arm/p384/bignum_cmul_p384.S | 2 +- arm/p384/bignum_deamont_p384.S | 2 +- arm/p384/bignum_demont_p384.S | 2 +- arm/p384/bignum_double_p384.S | 2 +- arm/p384/bignum_half_p384.S | 2 +- arm/p384/bignum_littleendian_6.S | 2 +- arm/p384/bignum_mod_n384.S | 2 +- arm/p384/bignum_mod_n384_6.S | 2 +- arm/p384/bignum_mod_p384.S | 2 +- arm/p384/bignum_mod_p384_6.S | 2 +- arm/p384/bignum_montmul_p384.S | 2 +- arm/p384/bignum_montmul_p384_alt.S | 2 +- arm/p384/bignum_montsqr_p384.S | 2 +- arm/p384/bignum_montsqr_p384_alt.S | 2 +- arm/p384/bignum_mux_6.S | 2 +- arm/p384/bignum_neg_p384.S | 2 +- arm/p384/bignum_nonzero_6.S | 2 +- arm/p384/bignum_optneg_p384.S | 2 +- arm/p384/bignum_sub_p384.S | 2 +- arm/p384/bignum_tomont_p384.S | 2 +- arm/p384/bignum_triple_p384.S | 2 +- arm/p521/bignum_add_p521.S | 2 +- arm/p521/bignum_cmul_p521.S | 2 +- arm/p521/bignum_deamont_p521.S | 2 +- arm/p521/bignum_demont_p521.S | 2 +- arm/p521/bignum_double_p521.S | 2 +- arm/p521/bignum_fromlebytes_p521.S | 2 +- arm/p521/bignum_half_p521.S | 2 +- arm/p521/bignum_mod_n521_9.S | 2 +- arm/p521/bignum_mod_p521_9.S | 2 +- arm/p521/bignum_montmul_p521.S | 2 +- arm/p521/bignum_montmul_p521_alt.S | 2 +- arm/p521/bignum_montsqr_p521.S | 2 +- arm/p521/bignum_montsqr_p521_alt.S | 2 +- arm/p521/bignum_mul_p521.S | 2 +- arm/p521/bignum_mul_p521_alt.S | 2 +- arm/p521/bignum_neg_p521.S | 2 +- arm/p521/bignum_optneg_p521.S | 2 +- arm/p521/bignum_sqr_p521.S | 2 +- arm/p521/bignum_sqr_p521_alt.S | 2 +- arm/p521/bignum_sub_p521.S | 2 +- arm/p521/bignum_tolebytes_p521.S | 2 +- arm/p521/bignum_tomont_p521.S | 2 +- arm/p521/bignum_triple_p521.S | 2 +- x86_att/curve25519/bignum_neg_p25519.S | 2 +- x86_att/p384/bignum_add_p384.S | 2 +- x86_att/p384/bignum_bigendian_6.S | 2 +- x86_att/p384/bignum_cmul_p384.S | 2 +- x86_att/p384/bignum_cmul_p384_alt.S | 2 +- x86_att/p384/bignum_deamont_p384.S | 2 +- x86_att/p384/bignum_deamont_p384_alt.S | 2 +- x86_att/p384/bignum_demont_p384.S | 2 +- x86_att/p384/bignum_demont_p384_alt.S | 2 +- x86_att/p384/bignum_double_p384.S | 2 +- x86_att/p384/bignum_half_p384.S | 2 +- x86_att/p384/bignum_littleendian_6.S | 2 +- x86_att/p384/bignum_mod_n384.S | 2 +- x86_att/p384/bignum_mod_n384_6.S | 2 +- x86_att/p384/bignum_mod_n384_alt.S | 2 +- x86_att/p384/bignum_mod_p384.S | 2 +- x86_att/p384/bignum_mod_p384_6.S | 2 +- x86_att/p384/bignum_mod_p384_alt.S | 2 +- x86_att/p384/bignum_montmul_p384.S | 2 +- x86_att/p384/bignum_montmul_p384_alt.S | 2 +- x86_att/p384/bignum_montsqr_p384.S | 2 +- x86_att/p384/bignum_montsqr_p384_alt.S | 2 +- x86_att/p384/bignum_mux_6.S | 2 +- x86_att/p384/bignum_neg_p384.S | 2 +- x86_att/p384/bignum_nonzero_6.S | 2 +- x86_att/p384/bignum_optneg_p384.S | 2 +- x86_att/p384/bignum_sub_p384.S | 2 +- x86_att/p384/bignum_tomont_p384.S | 2 +- x86_att/p384/bignum_tomont_p384_alt.S | 2 +- x86_att/p384/bignum_triple_p384.S | 2 +- x86_att/p384/bignum_triple_p384_alt.S | 2 +- x86_att/p521/bignum_add_p521.S | 2 +- x86_att/p521/bignum_cmul_p521.S | 2 +- x86_att/p521/bignum_cmul_p521_alt.S | 2 +- x86_att/p521/bignum_deamont_p521.S | 2 +- x86_att/p521/bignum_demont_p521.S | 2 +- x86_att/p521/bignum_double_p521.S | 2 +- x86_att/p521/bignum_fromlebytes_p521.S | 2 +- x86_att/p521/bignum_half_p521.S | 2 +- x86_att/p521/bignum_mod_n521_9.S | 2 +- x86_att/p521/bignum_mod_n521_9_alt.S | 2 +- x86_att/p521/bignum_mod_p521_9.S | 2 +- x86_att/p521/bignum_montmul_p521.S | 2 +- x86_att/p521/bignum_montmul_p521_alt.S | 2 +- x86_att/p521/bignum_montsqr_p521.S | 2 +- x86_att/p521/bignum_montsqr_p521_alt.S | 2 +- x86_att/p521/bignum_mul_p521.S | 2 +- x86_att/p521/bignum_mul_p521_alt.S | 2 +- x86_att/p521/bignum_neg_p521.S | 2 +- x86_att/p521/bignum_optneg_p521.S | 2 +- x86_att/p521/bignum_sqr_p521.S | 2 +- x86_att/p521/bignum_sqr_p521_alt.S | 2 +- x86_att/p521/bignum_sub_p521.S | 2 +- x86_att/p521/bignum_tolebytes_p521.S | 2 +- x86_att/p521/bignum_tomont_p521.S | 2 +- x86_att/p521/bignum_triple_p521.S | 2 +- x86_att/p521/bignum_triple_p521_alt.S | 2 +- 113 files changed, 113 insertions(+), 113 deletions(-) diff --git a/arm/curve25519/bignum_neg_p25519.S b/arm/curve25519/bignum_neg_p25519.S index b88a48a6ee..3d333ae42a 100644 --- a/arm/curve25519/bignum_neg_p25519.S +++ b/arm/curve25519/bignum_neg_p25519.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p25519) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p25519) diff --git a/arm/fastmul/bignum_emontredc_8n.S b/arm/fastmul/bignum_emontredc_8n.S index 104a0eb924..02fcff72a5 100644 --- a/arm/fastmul/bignum_emontredc_8n.S +++ b/arm/fastmul/bignum_emontredc_8n.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = m, X3 = w, returns X0 // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_emontredc_8n) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_emontredc_8n) diff --git a/arm/fastmul/bignum_kmul_16_32.S b/arm/fastmul/bignum_kmul_16_32.S index 9a29b26305..3234fa904d 100644 --- a/arm/fastmul/bignum_kmul_16_32.S +++ b/arm/fastmul/bignum_kmul_16_32.S @@ -26,7 +26,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y, X3 = t // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_kmul_16_32) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_kmul_16_32) diff --git a/arm/fastmul/bignum_kmul_32_64.S b/arm/fastmul/bignum_kmul_32_64.S index 8706ac6f2f..781b27bafe 100644 --- a/arm/fastmul/bignum_kmul_32_64.S +++ b/arm/fastmul/bignum_kmul_32_64.S @@ -26,7 +26,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y, X3 = t // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_kmul_32_64) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_kmul_32_64) diff --git a/arm/fastmul/bignum_ksqr_16_32.S b/arm/fastmul/bignum_ksqr_16_32.S index 36cf26d9cd..36c39c5630 100644 --- a/arm/fastmul/bignum_ksqr_16_32.S +++ b/arm/fastmul/bignum_ksqr_16_32.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = t // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_ksqr_16_32) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_ksqr_16_32) diff --git a/arm/fastmul/bignum_ksqr_32_64.S b/arm/fastmul/bignum_ksqr_32_64.S index 37d259e97b..7d42af67a3 100644 --- a/arm/fastmul/bignum_ksqr_32_64.S +++ b/arm/fastmul/bignum_ksqr_32_64.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = t // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_ksqr_32_64) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_ksqr_32_64) diff --git a/arm/generic/bignum_ge.S b/arm/generic/bignum_ge.S index badd521b56..a43105b9f8 100644 --- a/arm/generic/bignum_ge.S +++ b/arm/generic/bignum_ge.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = m, X1 = x, X2 = n, X3 = y, returns X0 // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_ge) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_ge) diff --git a/arm/generic/bignum_mul.S b/arm/generic/bignum_mul.S index fc7227aa62..1563e408d7 100644 --- a/arm/generic/bignum_mul.S +++ b/arm/generic/bignum_mul.S @@ -26,7 +26,7 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = m, X3 = x, X4 = n, X5 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul) diff --git a/arm/generic/bignum_optsub.S b/arm/generic/bignum_optsub.S index b2c0653df7..9f0a5f494c 100644 --- a/arm/generic/bignum_optsub.S +++ b/arm/generic/bignum_optsub.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = x, X3 = p, X4 = y, returns X0 // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optsub) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optsub) diff --git a/arm/generic/bignum_sqr.S b/arm/generic/bignum_sqr.S index cff9f6c29d..ec2f80b266 100644 --- a/arm/generic/bignum_sqr.S +++ b/arm/generic/bignum_sqr.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = k, X1 = z, X2 = n, X3 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr) diff --git a/arm/p384/bignum_add_p384.S b/arm/p384/bignum_add_p384.S index 257a5f6a58..dd7fc36a38 100644 --- a/arm/p384/bignum_add_p384.S +++ b/arm/p384/bignum_add_p384.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p384) diff --git a/arm/p384/bignum_bigendian_6.S b/arm/p384/bignum_bigendian_6.S index e0a85e7ccb..7d5145453e 100644 --- a/arm/p384/bignum_bigendian_6.S +++ b/arm/p384/bignum_bigendian_6.S @@ -35,7 +35,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_bigendian_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_bigendian_6) diff --git a/arm/p384/bignum_cmul_p384.S b/arm/p384/bignum_cmul_p384.S index 6b10ad254c..3ac32252f1 100644 --- a/arm/p384/bignum_cmul_p384.S +++ b/arm/p384/bignum_cmul_p384.S @@ -23,7 +23,7 @@ // // Standard ARM ABI: X0 = z, X1 = c, X2 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384) diff --git a/arm/p384/bignum_deamont_p384.S b/arm/p384/bignum_deamont_p384.S index 673cd97d53..932dfcc6e8 100644 --- a/arm/p384/bignum_deamont_p384.S +++ b/arm/p384/bignum_deamont_p384.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384) diff --git a/arm/p384/bignum_demont_p384.S b/arm/p384/bignum_demont_p384.S index 4debf61fd8..4b4b7e94a8 100644 --- a/arm/p384/bignum_demont_p384.S +++ b/arm/p384/bignum_demont_p384.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384) diff --git a/arm/p384/bignum_double_p384.S b/arm/p384/bignum_double_p384.S index 32088b55a2..cdcbc6aa67 100644 --- a/arm/p384/bignum_double_p384.S +++ b/arm/p384/bignum_double_p384.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p384) diff --git a/arm/p384/bignum_half_p384.S b/arm/p384/bignum_half_p384.S index f4f6be189b..c9b3c87665 100644 --- a/arm/p384/bignum_half_p384.S +++ b/arm/p384/bignum_half_p384.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p384) diff --git a/arm/p384/bignum_littleendian_6.S b/arm/p384/bignum_littleendian_6.S index 19748ecacb..3144fc18d4 100644 --- a/arm/p384/bignum_littleendian_6.S +++ b/arm/p384/bignum_littleendian_6.S @@ -35,7 +35,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_littleendian_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_littleendian_6) diff --git a/arm/p384/bignum_mod_n384.S b/arm/p384/bignum_mod_n384.S index 8cb4bf6562..a02771cbfe 100644 --- a/arm/p384/bignum_mod_n384.S +++ b/arm/p384/bignum_mod_n384.S @@ -24,7 +24,7 @@ // // Standard ARM ABI: X0 = z, X1 = k, X2 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384) diff --git a/arm/p384/bignum_mod_n384_6.S b/arm/p384/bignum_mod_n384_6.S index b906b30cc1..4ff06ce728 100644 --- a/arm/p384/bignum_mod_n384_6.S +++ b/arm/p384/bignum_mod_n384_6.S @@ -24,7 +24,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_6) diff --git a/arm/p384/bignum_mod_p384.S b/arm/p384/bignum_mod_p384.S index 48d103a37c..35771e7d8f 100644 --- a/arm/p384/bignum_mod_p384.S +++ b/arm/p384/bignum_mod_p384.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = k, X2 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384) diff --git a/arm/p384/bignum_mod_p384_6.S b/arm/p384/bignum_mod_p384_6.S index fdad1cc846..b198b5c603 100644 --- a/arm/p384/bignum_mod_p384_6.S +++ b/arm/p384/bignum_mod_p384_6.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_6) diff --git a/arm/p384/bignum_montmul_p384.S b/arm/p384/bignum_montmul_p384.S index fce8dfe714..48ca4c3218 100644 --- a/arm/p384/bignum_montmul_p384.S +++ b/arm/p384/bignum_montmul_p384.S @@ -26,7 +26,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384) diff --git a/arm/p384/bignum_montmul_p384_alt.S b/arm/p384/bignum_montmul_p384_alt.S index c3a3262738..72883df225 100644 --- a/arm/p384/bignum_montmul_p384_alt.S +++ b/arm/p384/bignum_montmul_p384_alt.S @@ -26,7 +26,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384_alt) diff --git a/arm/p384/bignum_montsqr_p384.S b/arm/p384/bignum_montsqr_p384.S index 0ad3906e31..0446ad74cd 100644 --- a/arm/p384/bignum_montsqr_p384.S +++ b/arm/p384/bignum_montsqr_p384.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384) diff --git a/arm/p384/bignum_montsqr_p384_alt.S b/arm/p384/bignum_montsqr_p384_alt.S index 8600c04890..35dba1ca50 100644 --- a/arm/p384/bignum_montsqr_p384_alt.S +++ b/arm/p384/bignum_montsqr_p384_alt.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384_alt) diff --git a/arm/p384/bignum_mux_6.S b/arm/p384/bignum_mux_6.S index d8ea1756ac..30dab886ae 100644 --- a/arm/p384/bignum_mux_6.S +++ b/arm/p384/bignum_mux_6.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = p, X1 = z, X2 = x, X3 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mux_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mux_6) diff --git a/arm/p384/bignum_neg_p384.S b/arm/p384/bignum_neg_p384.S index 67f725db6c..6d661a716d 100644 --- a/arm/p384/bignum_neg_p384.S +++ b/arm/p384/bignum_neg_p384.S @@ -21,7 +21,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p384) diff --git a/arm/p384/bignum_nonzero_6.S b/arm/p384/bignum_nonzero_6.S index 79e0ba3f37..bdc6232eee 100644 --- a/arm/p384/bignum_nonzero_6.S +++ b/arm/p384/bignum_nonzero_6.S @@ -21,7 +21,7 @@ // // Standard ARM ABI: X0 = x, returns X0 // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_nonzero_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_nonzero_6) diff --git a/arm/p384/bignum_optneg_p384.S b/arm/p384/bignum_optneg_p384.S index 096e583949..eec01642d1 100644 --- a/arm/p384/bignum_optneg_p384.S +++ b/arm/p384/bignum_optneg_p384.S @@ -23,7 +23,7 @@ // // Standard ARM ABI: X0 = z, X1 = p, X2 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p384) diff --git a/arm/p384/bignum_sub_p384.S b/arm/p384/bignum_sub_p384.S index 14d84dcc02..0e8af143f3 100644 --- a/arm/p384/bignum_sub_p384.S +++ b/arm/p384/bignum_sub_p384.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p384) diff --git a/arm/p384/bignum_tomont_p384.S b/arm/p384/bignum_tomont_p384.S index 0aab015130..bc240cffdd 100644 --- a/arm/p384/bignum_tomont_p384.S +++ b/arm/p384/bignum_tomont_p384.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384) diff --git a/arm/p384/bignum_triple_p384.S b/arm/p384/bignum_triple_p384.S index 00f4bdc05d..0786f21fab 100644 --- a/arm/p384/bignum_triple_p384.S +++ b/arm/p384/bignum_triple_p384.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384) diff --git a/arm/p521/bignum_add_p521.S b/arm/p521/bignum_add_p521.S index d39c60b920..7576cbc741 100644 --- a/arm/p521/bignum_add_p521.S +++ b/arm/p521/bignum_add_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p521) diff --git a/arm/p521/bignum_cmul_p521.S b/arm/p521/bignum_cmul_p521.S index ef2d756449..f6b4face41 100644 --- a/arm/p521/bignum_cmul_p521.S +++ b/arm/p521/bignum_cmul_p521.S @@ -23,7 +23,7 @@ // // Standard ARM ABI: X0 = z, X1 = c, X2 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521) diff --git a/arm/p521/bignum_deamont_p521.S b/arm/p521/bignum_deamont_p521.S index 2e1e95882e..c3eab0a481 100644 --- a/arm/p521/bignum_deamont_p521.S +++ b/arm/p521/bignum_deamont_p521.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p521) diff --git a/arm/p521/bignum_demont_p521.S b/arm/p521/bignum_demont_p521.S index 0cd1afbb8f..1756f86e10 100644 --- a/arm/p521/bignum_demont_p521.S +++ b/arm/p521/bignum_demont_p521.S @@ -25,7 +25,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p521) diff --git a/arm/p521/bignum_double_p521.S b/arm/p521/bignum_double_p521.S index 6331d4fc5e..df3cfee500 100644 --- a/arm/p521/bignum_double_p521.S +++ b/arm/p521/bignum_double_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p521) diff --git a/arm/p521/bignum_fromlebytes_p521.S b/arm/p521/bignum_fromlebytes_p521.S index abba7a8b20..c348d5faa7 100644 --- a/arm/p521/bignum_fromlebytes_p521.S +++ b/arm/p521/bignum_fromlebytes_p521.S @@ -24,7 +24,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_p521) diff --git a/arm/p521/bignum_half_p521.S b/arm/p521/bignum_half_p521.S index 957abcbbc3..bfc489e3bc 100644 --- a/arm/p521/bignum_half_p521.S +++ b/arm/p521/bignum_half_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p521) diff --git a/arm/p521/bignum_mod_n521_9.S b/arm/p521/bignum_mod_n521_9.S index a3b733bcd2..cfae5787a2 100644 --- a/arm/p521/bignum_mod_n521_9.S +++ b/arm/p521/bignum_mod_n521_9.S @@ -24,7 +24,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9) diff --git a/arm/p521/bignum_mod_p521_9.S b/arm/p521/bignum_mod_p521_9.S index ae0bfdc706..666caca4b5 100644 --- a/arm/p521/bignum_mod_p521_9.S +++ b/arm/p521/bignum_mod_p521_9.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p521_9) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p521_9) diff --git a/arm/p521/bignum_montmul_p521.S b/arm/p521/bignum_montmul_p521.S index 809cedac34..5d13ca650a 100644 --- a/arm/p521/bignum_montmul_p521.S +++ b/arm/p521/bignum_montmul_p521.S @@ -27,7 +27,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521) diff --git a/arm/p521/bignum_montmul_p521_alt.S b/arm/p521/bignum_montmul_p521_alt.S index ed84567a7c..4a26ae6ed2 100644 --- a/arm/p521/bignum_montmul_p521_alt.S +++ b/arm/p521/bignum_montmul_p521_alt.S @@ -27,7 +27,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521_alt) diff --git a/arm/p521/bignum_montsqr_p521.S b/arm/p521/bignum_montsqr_p521.S index 6ec3332734..9569487a38 100644 --- a/arm/p521/bignum_montsqr_p521.S +++ b/arm/p521/bignum_montsqr_p521.S @@ -27,7 +27,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521) diff --git a/arm/p521/bignum_montsqr_p521_alt.S b/arm/p521/bignum_montsqr_p521_alt.S index fd30cd81b3..f15b9fc7d1 100644 --- a/arm/p521/bignum_montsqr_p521_alt.S +++ b/arm/p521/bignum_montsqr_p521_alt.S @@ -27,7 +27,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521_alt) diff --git a/arm/p521/bignum_mul_p521.S b/arm/p521/bignum_mul_p521.S index d4ed4b7e0a..26824c472c 100644 --- a/arm/p521/bignum_mul_p521.S +++ b/arm/p521/bignum_mul_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521) diff --git a/arm/p521/bignum_mul_p521_alt.S b/arm/p521/bignum_mul_p521_alt.S index f0b6ee47a5..089da6d70f 100644 --- a/arm/p521/bignum_mul_p521_alt.S +++ b/arm/p521/bignum_mul_p521_alt.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521_alt) diff --git a/arm/p521/bignum_neg_p521.S b/arm/p521/bignum_neg_p521.S index 90292e39ed..8be4de4bc0 100644 --- a/arm/p521/bignum_neg_p521.S +++ b/arm/p521/bignum_neg_p521.S @@ -21,7 +21,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p521) diff --git a/arm/p521/bignum_optneg_p521.S b/arm/p521/bignum_optneg_p521.S index 2822ab4c69..03dbfc666c 100644 --- a/arm/p521/bignum_optneg_p521.S +++ b/arm/p521/bignum_optneg_p521.S @@ -23,7 +23,7 @@ // // Standard ARM ABI: X0 = z, X1 = p, X2 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p521) diff --git a/arm/p521/bignum_sqr_p521.S b/arm/p521/bignum_sqr_p521.S index bb68c3396c..d8fb453e36 100644 --- a/arm/p521/bignum_sqr_p521.S +++ b/arm/p521/bignum_sqr_p521.S @@ -21,7 +21,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521) diff --git a/arm/p521/bignum_sqr_p521_alt.S b/arm/p521/bignum_sqr_p521_alt.S index 10417d12e1..6ba447f390 100644 --- a/arm/p521/bignum_sqr_p521_alt.S +++ b/arm/p521/bignum_sqr_p521_alt.S @@ -21,7 +21,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521_alt) diff --git a/arm/p521/bignum_sub_p521.S b/arm/p521/bignum_sub_p521.S index 1a074aed12..759e158039 100644 --- a/arm/p521/bignum_sub_p521.S +++ b/arm/p521/bignum_sub_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x, X2 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p521) diff --git a/arm/p521/bignum_tolebytes_p521.S b/arm/p521/bignum_tolebytes_p521.S index 397d2a73bf..e798a86e8b 100644 --- a/arm/p521/bignum_tolebytes_p521.S +++ b/arm/p521/bignum_tolebytes_p521.S @@ -24,7 +24,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_p521) diff --git a/arm/p521/bignum_tomont_p521.S b/arm/p521/bignum_tomont_p521.S index 1ca5089f9e..f3b114b893 100644 --- a/arm/p521/bignum_tomont_p521.S +++ b/arm/p521/bignum_tomont_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p521) diff --git a/arm/p521/bignum_triple_p521.S b/arm/p521/bignum_triple_p521.S index 7b0caca30a..b28ccd419e 100644 --- a/arm/p521/bignum_triple_p521.S +++ b/arm/p521/bignum_triple_p521.S @@ -22,7 +22,7 @@ // // Standard ARM ABI: X0 = z, X1 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521) diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S index fad953baf4..bbe2dc014c 100644 --- a/x86_att/curve25519/bignum_neg_p25519.S +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p25519) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p25519) diff --git a/x86_att/p384/bignum_add_p384.S b/x86_att/p384/bignum_add_p384.S index 5467fb94af..54341cd8da 100644 --- a/x86_att/p384/bignum_add_p384.S +++ b/x86_att/p384/bignum_add_p384.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p384) diff --git a/x86_att/p384/bignum_bigendian_6.S b/x86_att/p384/bignum_bigendian_6.S index 8ac48bc392..44da1b75bd 100644 --- a/x86_att/p384/bignum_bigendian_6.S +++ b/x86_att/p384/bignum_bigendian_6.S @@ -36,7 +36,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_bigendian_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_bigendian_6) diff --git a/x86_att/p384/bignum_cmul_p384.S b/x86_att/p384/bignum_cmul_p384.S index a186cc556d..bd4afcd54e 100644 --- a/x86_att/p384/bignum_cmul_p384.S +++ b/x86_att/p384/bignum_cmul_p384.S @@ -25,7 +25,7 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384) diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index 1af6b54f03..67a8748bb0 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -25,7 +25,7 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384_alt) diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index a0fbde1a95..6561abd79e 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384) diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index b916b9118e..428aa764a5 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384_alt) diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index 20dd03a54b..bf621dcc75 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384) diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index 652273498d..9009a22369 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384_alt) diff --git a/x86_att/p384/bignum_double_p384.S b/x86_att/p384/bignum_double_p384.S index f046ea06eb..cb83e6e021 100644 --- a/x86_att/p384/bignum_double_p384.S +++ b/x86_att/p384/bignum_double_p384.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p384) diff --git a/x86_att/p384/bignum_half_p384.S b/x86_att/p384/bignum_half_p384.S index 75a18cafb3..7e353c356c 100644 --- a/x86_att/p384/bignum_half_p384.S +++ b/x86_att/p384/bignum_half_p384.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p384) diff --git a/x86_att/p384/bignum_littleendian_6.S b/x86_att/p384/bignum_littleendian_6.S index 7acfa98303..e632a38491 100644 --- a/x86_att/p384/bignum_littleendian_6.S +++ b/x86_att/p384/bignum_littleendian_6.S @@ -35,7 +35,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_littleendian_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_littleendian_6) diff --git a/x86_att/p384/bignum_mod_n384.S b/x86_att/p384/bignum_mod_n384.S index 18fa903993..fcb8123352 100644 --- a/x86_att/p384/bignum_mod_n384.S +++ b/x86_att/p384/bignum_mod_n384.S @@ -26,7 +26,7 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384) diff --git a/x86_att/p384/bignum_mod_n384_6.S b/x86_att/p384/bignum_mod_n384_6.S index b84c68a6ec..d3048f01b1 100644 --- a/x86_att/p384/bignum_mod_n384_6.S +++ b/x86_att/p384/bignum_mod_n384_6.S @@ -26,7 +26,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_6) diff --git a/x86_att/p384/bignum_mod_n384_alt.S b/x86_att/p384/bignum_mod_n384_alt.S index 3e13a4bd51..d4cf45f54a 100644 --- a/x86_att/p384/bignum_mod_n384_alt.S +++ b/x86_att/p384/bignum_mod_n384_alt.S @@ -26,7 +26,7 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_alt) diff --git a/x86_att/p384/bignum_mod_p384.S b/x86_att/p384/bignum_mod_p384.S index b558054813..3ee2ee3169 100644 --- a/x86_att/p384/bignum_mod_p384.S +++ b/x86_att/p384/bignum_mod_p384.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384) diff --git a/x86_att/p384/bignum_mod_p384_6.S b/x86_att/p384/bignum_mod_p384_6.S index cca2ff0f7f..03955a2346 100644 --- a/x86_att/p384/bignum_mod_p384_6.S +++ b/x86_att/p384/bignum_mod_p384_6.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_6) diff --git a/x86_att/p384/bignum_mod_p384_alt.S b/x86_att/p384/bignum_mod_p384_alt.S index 926a2bbd08..d1e8ccb9b5 100644 --- a/x86_att/p384/bignum_mod_p384_alt.S +++ b/x86_att/p384/bignum_mod_p384_alt.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = k, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_alt) diff --git a/x86_att/p384/bignum_montmul_p384.S b/x86_att/p384/bignum_montmul_p384.S index ec40ebafce..5744e71262 100644 --- a/x86_att/p384/bignum_montmul_p384.S +++ b/x86_att/p384/bignum_montmul_p384.S @@ -28,7 +28,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ----------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384) diff --git a/x86_att/p384/bignum_montmul_p384_alt.S b/x86_att/p384/bignum_montmul_p384_alt.S index 8a15b4995e..cadb5d3457 100644 --- a/x86_att/p384/bignum_montmul_p384_alt.S +++ b/x86_att/p384/bignum_montmul_p384_alt.S @@ -28,7 +28,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ----------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384_alt) diff --git a/x86_att/p384/bignum_montsqr_p384.S b/x86_att/p384/bignum_montsqr_p384.S index af245ed91a..05b9681f71 100644 --- a/x86_att/p384/bignum_montsqr_p384.S +++ b/x86_att/p384/bignum_montsqr_p384.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384) diff --git a/x86_att/p384/bignum_montsqr_p384_alt.S b/x86_att/p384/bignum_montsqr_p384_alt.S index 9b4c9374fe..08d421ecb9 100644 --- a/x86_att/p384/bignum_montsqr_p384_alt.S +++ b/x86_att/p384/bignum_montsqr_p384_alt.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384_alt) diff --git a/x86_att/p384/bignum_mux_6.S b/x86_att/p384/bignum_mux_6.S index 78e055f314..29ac9e24f9 100644 --- a/x86_att/p384/bignum_mux_6.S +++ b/x86_att/p384/bignum_mux_6.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = p, RDX = z, R8 = x, R9 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mux_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mux_6) diff --git a/x86_att/p384/bignum_neg_p384.S b/x86_att/p384/bignum_neg_p384.S index 5522bd22e9..b1941b3a11 100644 --- a/x86_att/p384/bignum_neg_p384.S +++ b/x86_att/p384/bignum_neg_p384.S @@ -23,7 +23,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p384) diff --git a/x86_att/p384/bignum_nonzero_6.S b/x86_att/p384/bignum_nonzero_6.S index 9c906b34d4..4888fa518d 100644 --- a/x86_att/p384/bignum_nonzero_6.S +++ b/x86_att/p384/bignum_nonzero_6.S @@ -23,7 +23,7 @@ // Microsoft x64 ABI: RCX = x, returns RAX // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_nonzero_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_nonzero_6) diff --git a/x86_att/p384/bignum_optneg_p384.S b/x86_att/p384/bignum_optneg_p384.S index ca72064e49..aafce2b72f 100644 --- a/x86_att/p384/bignum_optneg_p384.S +++ b/x86_att/p384/bignum_optneg_p384.S @@ -25,7 +25,7 @@ // Microsoft x64 ABI: RCX = z, RDX = p, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p384) diff --git a/x86_att/p384/bignum_sub_p384.S b/x86_att/p384/bignum_sub_p384.S index 4a687111e0..e2c4776890 100644 --- a/x86_att/p384/bignum_sub_p384.S +++ b/x86_att/p384/bignum_sub_p384.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p384) diff --git a/x86_att/p384/bignum_tomont_p384.S b/x86_att/p384/bignum_tomont_p384.S index e5c16a05bf..73351c221e 100644 --- a/x86_att/p384/bignum_tomont_p384.S +++ b/x86_att/p384/bignum_tomont_p384.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384) diff --git a/x86_att/p384/bignum_tomont_p384_alt.S b/x86_att/p384/bignum_tomont_p384_alt.S index 434d5b6115..eccb5ba163 100644 --- a/x86_att/p384/bignum_tomont_p384_alt.S +++ b/x86_att/p384/bignum_tomont_p384_alt.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384_alt) diff --git a/x86_att/p384/bignum_triple_p384.S b/x86_att/p384/bignum_triple_p384.S index c0a20bc5b9..7cdf2399b9 100644 --- a/x86_att/p384/bignum_triple_p384.S +++ b/x86_att/p384/bignum_triple_p384.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384) diff --git a/x86_att/p384/bignum_triple_p384_alt.S b/x86_att/p384/bignum_triple_p384_alt.S index 276fd8b2c3..52e6614a80 100644 --- a/x86_att/p384/bignum_triple_p384_alt.S +++ b/x86_att/p384/bignum_triple_p384_alt.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384_alt) diff --git a/x86_att/p521/bignum_add_p521.S b/x86_att/p521/bignum_add_p521.S index 9b636f7bb7..149d0df0a5 100644 --- a/x86_att/p521/bignum_add_p521.S +++ b/x86_att/p521/bignum_add_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p521) diff --git a/x86_att/p521/bignum_cmul_p521.S b/x86_att/p521/bignum_cmul_p521.S index 1811760bab..118910535b 100644 --- a/x86_att/p521/bignum_cmul_p521.S +++ b/x86_att/p521/bignum_cmul_p521.S @@ -25,7 +25,7 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521) diff --git a/x86_att/p521/bignum_cmul_p521_alt.S b/x86_att/p521/bignum_cmul_p521_alt.S index 4c0fc86a10..640a1de89c 100644 --- a/x86_att/p521/bignum_cmul_p521_alt.S +++ b/x86_att/p521/bignum_cmul_p521_alt.S @@ -25,7 +25,7 @@ // Microsoft x64 ABI: RCX = z, RDX = c, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521_alt) diff --git a/x86_att/p521/bignum_deamont_p521.S b/x86_att/p521/bignum_deamont_p521.S index a5b38a7b91..a118267d02 100644 --- a/x86_att/p521/bignum_deamont_p521.S +++ b/x86_att/p521/bignum_deamont_p521.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p521) diff --git a/x86_att/p521/bignum_demont_p521.S b/x86_att/p521/bignum_demont_p521.S index 87d9fefc9a..b99c6298ce 100644 --- a/x86_att/p521/bignum_demont_p521.S +++ b/x86_att/p521/bignum_demont_p521.S @@ -27,7 +27,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p521) diff --git a/x86_att/p521/bignum_double_p521.S b/x86_att/p521/bignum_double_p521.S index eb7e3355ed..33fe1e5a90 100644 --- a/x86_att/p521/bignum_double_p521.S +++ b/x86_att/p521/bignum_double_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p521) diff --git a/x86_att/p521/bignum_fromlebytes_p521.S b/x86_att/p521/bignum_fromlebytes_p521.S index 6429824064..3226f43839 100644 --- a/x86_att/p521/bignum_fromlebytes_p521.S +++ b/x86_att/p521/bignum_fromlebytes_p521.S @@ -28,7 +28,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_p521) diff --git a/x86_att/p521/bignum_half_p521.S b/x86_att/p521/bignum_half_p521.S index a637153eaa..505474f5f4 100644 --- a/x86_att/p521/bignum_half_p521.S +++ b/x86_att/p521/bignum_half_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p521) diff --git a/x86_att/p521/bignum_mod_n521_9.S b/x86_att/p521/bignum_mod_n521_9.S index 5d8bb8ea22..1bb43aa316 100644 --- a/x86_att/p521/bignum_mod_n521_9.S +++ b/x86_att/p521/bignum_mod_n521_9.S @@ -26,7 +26,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9) diff --git a/x86_att/p521/bignum_mod_n521_9_alt.S b/x86_att/p521/bignum_mod_n521_9_alt.S index 555930bf4d..9421f4ab08 100644 --- a/x86_att/p521/bignum_mod_n521_9_alt.S +++ b/x86_att/p521/bignum_mod_n521_9_alt.S @@ -26,7 +26,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9_alt) diff --git a/x86_att/p521/bignum_mod_p521_9.S b/x86_att/p521/bignum_mod_p521_9.S index dfe3b20781..ef8cc7d5fc 100644 --- a/x86_att/p521/bignum_mod_p521_9.S +++ b/x86_att/p521/bignum_mod_p521_9.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p521_9) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p521_9) diff --git a/x86_att/p521/bignum_montmul_p521.S b/x86_att/p521/bignum_montmul_p521.S index 498422eeb8..f6a74b3783 100644 --- a/x86_att/p521/bignum_montmul_p521.S +++ b/x86_att/p521/bignum_montmul_p521.S @@ -29,7 +29,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521) diff --git a/x86_att/p521/bignum_montmul_p521_alt.S b/x86_att/p521/bignum_montmul_p521_alt.S index 5fa8290e7d..05caf63424 100644 --- a/x86_att/p521/bignum_montmul_p521_alt.S +++ b/x86_att/p521/bignum_montmul_p521_alt.S @@ -29,7 +29,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521_alt) diff --git a/x86_att/p521/bignum_montsqr_p521.S b/x86_att/p521/bignum_montsqr_p521.S index b6fbcb8069..689472385a 100644 --- a/x86_att/p521/bignum_montsqr_p521.S +++ b/x86_att/p521/bignum_montsqr_p521.S @@ -29,7 +29,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521) diff --git a/x86_att/p521/bignum_montsqr_p521_alt.S b/x86_att/p521/bignum_montsqr_p521_alt.S index e710af80b5..84adf6bdb3 100644 --- a/x86_att/p521/bignum_montsqr_p521_alt.S +++ b/x86_att/p521/bignum_montsqr_p521_alt.S @@ -29,7 +29,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521_alt) diff --git a/x86_att/p521/bignum_mul_p521.S b/x86_att/p521/bignum_mul_p521.S index c4ee6b8e8f..3c8fa48eae 100644 --- a/x86_att/p521/bignum_mul_p521.S +++ b/x86_att/p521/bignum_mul_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521) diff --git a/x86_att/p521/bignum_mul_p521_alt.S b/x86_att/p521/bignum_mul_p521_alt.S index c24decbb60..bc986e4a87 100644 --- a/x86_att/p521/bignum_mul_p521_alt.S +++ b/x86_att/p521/bignum_mul_p521_alt.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521_alt) diff --git a/x86_att/p521/bignum_neg_p521.S b/x86_att/p521/bignum_neg_p521.S index a69298bb0c..eed3614dec 100644 --- a/x86_att/p521/bignum_neg_p521.S +++ b/x86_att/p521/bignum_neg_p521.S @@ -23,7 +23,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p521) diff --git a/x86_att/p521/bignum_optneg_p521.S b/x86_att/p521/bignum_optneg_p521.S index 1a31081950..80bb33ffb5 100644 --- a/x86_att/p521/bignum_optneg_p521.S +++ b/x86_att/p521/bignum_optneg_p521.S @@ -25,7 +25,7 @@ // Microsoft x64 ABI: RCX = z, RDX = p, R8 = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p521) diff --git a/x86_att/p521/bignum_sqr_p521.S b/x86_att/p521/bignum_sqr_p521.S index 423b9058c8..74b88a28e8 100644 --- a/x86_att/p521/bignum_sqr_p521.S +++ b/x86_att/p521/bignum_sqr_p521.S @@ -23,7 +23,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521) diff --git a/x86_att/p521/bignum_sqr_p521_alt.S b/x86_att/p521/bignum_sqr_p521_alt.S index 8a1e56f76a..44cf65b544 100644 --- a/x86_att/p521/bignum_sqr_p521_alt.S +++ b/x86_att/p521/bignum_sqr_p521_alt.S @@ -23,7 +23,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521_alt) diff --git a/x86_att/p521/bignum_sub_p521.S b/x86_att/p521/bignum_sub_p521.S index a41965e4ae..3ce8104b39 100644 --- a/x86_att/p521/bignum_sub_p521.S +++ b/x86_att/p521/bignum_sub_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x, R8 = y // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p521) diff --git a/x86_att/p521/bignum_tolebytes_p521.S b/x86_att/p521/bignum_tolebytes_p521.S index 72c13e2c9b..5d2bc9a654 100644 --- a/x86_att/p521/bignum_tolebytes_p521.S +++ b/x86_att/p521/bignum_tolebytes_p521.S @@ -28,7 +28,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_p521) diff --git a/x86_att/p521/bignum_tomont_p521.S b/x86_att/p521/bignum_tomont_p521.S index 4291c44cdd..fa05076593 100644 --- a/x86_att/p521/bignum_tomont_p521.S +++ b/x86_att/p521/bignum_tomont_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p521) diff --git a/x86_att/p521/bignum_triple_p521.S b/x86_att/p521/bignum_triple_p521.S index 3c02c2c4f5..9380372cea 100644 --- a/x86_att/p521/bignum_triple_p521.S +++ b/x86_att/p521/bignum_triple_p521.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521) diff --git a/x86_att/p521/bignum_triple_p521_alt.S b/x86_att/p521/bignum_triple_p521_alt.S index 4e2283e825..5f23a3a440 100644 --- a/x86_att/p521/bignum_triple_p521_alt.S +++ b/x86_att/p521/bignum_triple_p521_alt.S @@ -24,7 +24,7 @@ // Microsoft x64 ABI: RCX = z, RDX = x // ---------------------------------------------------------------------------- -#include "../../include/_internal_s2n_bignum.h" +#include "_internal_s2n_bignum.h" S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521_alt) From 1ba86f2cc6e4b1d7854a109d072b971c2716b84f Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Mon, 23 May 2022 09:40:41 -0400 Subject: [PATCH 07/42] Invoke cpp as ' -E' s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/7685298a7129ba75ee774fb272a8b94fb35fdcb5 --- arm/p384/Makefile | 2 +- arm/p521/Makefile | 2 +- include/_internal_s2n_bignum.h | 21 ++++++++------------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/arm/p384/Makefile b/arm/p384/Makefile index 354898b28a..d3859296e4 100644 --- a/arm/p384/Makefile +++ b/arm/p384/Makefile @@ -55,7 +55,7 @@ OBJ = bignum_add_p384.o \ bignum_tomont_p384.o \ bignum_triple_p384.o -%.o : %.S ; cpp $< | $(GAS) -o $@ - +%.o : %.S ; $(CXX) -E -I../../include $< | $(GAS) -o $@ - default: $(OBJ); diff --git a/arm/p521/Makefile b/arm/p521/Makefile index ac9617fad0..70fc7aff28 100644 --- a/arm/p521/Makefile +++ b/arm/p521/Makefile @@ -55,7 +55,7 @@ OBJ = bignum_add_p521.o \ bignum_tomont_p521.o \ bignum_triple_p521.o -%.o : %.S ; cpp $< | $(GAS) -o $@ - +%.o : %.S ; $(CXX) -E -I../../include $< | $(GAS) -o $@ - default: $(OBJ); diff --git a/include/_internal_s2n_bignum.h b/include/_internal_s2n_bignum.h index 2557591614..c7bdf160a7 100644 --- a/include/_internal_s2n_bignum.h +++ b/include/_internal_s2n_bignum.h @@ -1,18 +1,13 @@ #ifdef __APPLE__ -# define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl _##name -# ifdef S2N_BN_HIDE_SYMBOLS -# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern _##name -# else -# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ -# endif -# define S2N_BN_SYMBOL(name) _##name +# define S2N_BN_SYMBOL(NAME) _##NAME #else -# define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl name -# ifdef S2N_BN_HIDE_SYMBOLS -# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .hidden name -# else -# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ -# endif # define S2N_BN_SYMBOL(name) name +#endif + +#define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) +#ifdef S2N_BN_HIDE_SYMBOLS +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern S2N_BN_SYMBOL(name) +#else +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ #endif \ No newline at end of file From 02cb77f59c03c35ca83dcdd09ec19220a223c636 Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Mon, 23 May 2022 10:01:59 -0400 Subject: [PATCH 08/42] Use .hidden directive on non-Apple platforms s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/02b7710be1e1552c5135b9a9affeaba280491051 --- include/_internal_s2n_bignum.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/_internal_s2n_bignum.h b/include/_internal_s2n_bignum.h index c7bdf160a7..c7cedb633a 100644 --- a/include/_internal_s2n_bignum.h +++ b/include/_internal_s2n_bignum.h @@ -7,7 +7,11 @@ #define S2N_BN_SYM_VISIBILITY_DIRECTIVE(name) .globl S2N_BN_SYMBOL(name) #ifdef S2N_BN_HIDE_SYMBOLS -# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern S2N_BN_SYMBOL(name) +# ifdef __APPLE__ +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .private_extern S2N_BN_SYMBOL(name) +# else +# define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) .hidden S2N_BN_SYMBOL(name) +# endif #else # define S2N_BN_SYM_PRIVACY_DIRECTIVE(name) /* NO-OP: S2N_BN_SYM_PRIVACY_DIRECTIVE */ #endif \ No newline at end of file From e7f49a90199aadea3caa5c25f54eea1f82fe7fc0 Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Tue, 24 May 2022 13:25:40 -0400 Subject: [PATCH 09/42] Use '$(CC) -E' for preprocessor s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/0fd68eb4af6a4f6dafd85b147f4df4c3549919f9 --- arm/p384/Makefile | 2 +- arm/p521/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arm/p384/Makefile b/arm/p384/Makefile index d3859296e4..469a20ff12 100644 --- a/arm/p384/Makefile +++ b/arm/p384/Makefile @@ -55,7 +55,7 @@ OBJ = bignum_add_p384.o \ bignum_tomont_p384.o \ bignum_triple_p384.o -%.o : %.S ; $(CXX) -E -I../../include $< | $(GAS) -o $@ - +%.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ - default: $(OBJ); diff --git a/arm/p521/Makefile b/arm/p521/Makefile index 70fc7aff28..921016115f 100644 --- a/arm/p521/Makefile +++ b/arm/p521/Makefile @@ -55,7 +55,7 @@ OBJ = bignum_add_p521.o \ bignum_tomont_p521.o \ bignum_triple_p521.o -%.o : %.S ; $(CXX) -E -I../../include $< | $(GAS) -o $@ - +%.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ - default: $(OBJ); From 3c8db851bfbfc779943ad9ae6e41fabc57c00782 Mon Sep 17 00:00:00 2001 From: Justin Smith Date: Wed, 8 Jun 2022 12:12:13 -0400 Subject: [PATCH 10/42] Several files missing .text directive s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/5df01541f97b68708e2678249f66e2b1bf1cbd1e --- x86_att/p384/bignum_add_p384.S | 1 + x86_att/p384/bignum_cmul_p384_alt.S | 1 + x86_att/p384/bignum_deamont_p384.S | 2 ++ x86_att/p384/bignum_deamont_p384_alt.S | 1 + x86_att/p384/bignum_demont_p384.S | 1 + x86_att/p384/bignum_demont_p384_alt.S | 1 + 6 files changed, 7 insertions(+) diff --git a/x86_att/p384/bignum_add_p384.S b/x86_att/p384/bignum_add_p384.S index 54341cd8da..3a2be9c5e6 100644 --- a/x86_att/p384/bignum_add_p384.S +++ b/x86_att/p384/bignum_add_p384.S @@ -28,6 +28,7 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p384) + .text #define z %rdi #define x %rsi diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index 67a8748bb0..f390c461f3 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -29,6 +29,7 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384_alt) + .text #define z %rdi diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index 6561abd79e..39648d3291 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -31,6 +31,8 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384) + .text + #define z %rdi #define x %rsi diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index 428aa764a5..e6aa8f5374 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -31,6 +31,7 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384_alt) + .text #define z %rdi #define x %rsi diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index bf621dcc75..9ccaece15e 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -31,6 +31,7 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384) + .text #define z %rdi #define x %rsi diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index 9009a22369..b4084025d2 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -31,6 +31,7 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384_alt) + .text #define z %rdi #define x %rsi From b5347001de80badb4e7da3574cf86337515cb4b3 Mon Sep 17 00:00:00 2001 From: Justin W Smith Date: Wed, 8 Jun 2022 18:25:39 +0000 Subject: [PATCH 11/42] Re-make x86_att s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/e5d0f812eddbbaf6036a3fb5768ada768cdaac38 --- x86_att/curve25519/bignum_neg_p25519.S | 1 + x86_att/p384/bignum_add_p384.S | 1 + x86_att/p384/bignum_bigendian_6.S | 1 + x86_att/p384/bignum_cmul_p384.S | 1 + x86_att/p384/bignum_cmul_p384_alt.S | 1 + x86_att/p384/bignum_deamont_p384.S | 1 + x86_att/p384/bignum_deamont_p384_alt.S | 1 + x86_att/p384/bignum_demont_p384.S | 1 + x86_att/p384/bignum_demont_p384_alt.S | 1 + x86_att/p384/bignum_double_p384.S | 1 + x86_att/p384/bignum_half_p384.S | 1 + x86_att/p384/bignum_littleendian_6.S | 1 + x86_att/p384/bignum_mod_n384.S | 2 +- x86_att/p384/bignum_mod_n384_6.S | 2 +- x86_att/p384/bignum_mod_n384_alt.S | 2 +- x86_att/p384/bignum_mod_p384.S | 2 +- x86_att/p384/bignum_mod_p384_6.S | 2 +- x86_att/p384/bignum_mod_p384_alt.S | 2 +- x86_att/p384/bignum_montmul_p384.S | 2 +- x86_att/p384/bignum_montmul_p384_alt.S | 2 +- x86_att/p384/bignum_montsqr_p384.S | 2 +- x86_att/p384/bignum_montsqr_p384_alt.S | 2 +- x86_att/p384/bignum_mux_6.S | 2 +- x86_att/p384/bignum_neg_p384.S | 2 +- x86_att/p384/bignum_nonzero_6.S | 2 +- x86_att/p384/bignum_optneg_p384.S | 2 +- x86_att/p384/bignum_sub_p384.S | 2 +- x86_att/p384/bignum_tomont_p384.S | 2 +- x86_att/p384/bignum_tomont_p384_alt.S | 2 +- x86_att/p384/bignum_triple_p384.S | 2 +- x86_att/p384/bignum_triple_p384_alt.S | 2 +- x86_att/p521/bignum_add_p521.S | 2 +- x86_att/p521/bignum_cmul_p521.S | 2 +- x86_att/p521/bignum_cmul_p521_alt.S | 2 +- x86_att/p521/bignum_deamont_p521.S | 2 +- x86_att/p521/bignum_demont_p521.S | 2 +- x86_att/p521/bignum_double_p521.S | 2 +- x86_att/p521/bignum_fromlebytes_p521.S | 2 +- x86_att/p521/bignum_half_p521.S | 2 +- x86_att/p521/bignum_mod_n521_9.S | 2 +- x86_att/p521/bignum_mod_n521_9_alt.S | 2 +- x86_att/p521/bignum_mod_p521_9.S | 2 +- x86_att/p521/bignum_montmul_p521.S | 2 +- x86_att/p521/bignum_montmul_p521_alt.S | 2 +- x86_att/p521/bignum_montsqr_p521.S | 2 +- x86_att/p521/bignum_montsqr_p521_alt.S | 2 +- x86_att/p521/bignum_mul_p521.S | 2 +- x86_att/p521/bignum_mul_p521_alt.S | 2 +- x86_att/p521/bignum_neg_p521.S | 2 +- x86_att/p521/bignum_optneg_p521.S | 2 +- x86_att/p521/bignum_sqr_p521.S | 2 +- x86_att/p521/bignum_sqr_p521_alt.S | 2 +- x86_att/p521/bignum_sub_p521.S | 2 +- x86_att/p521/bignum_tolebytes_p521.S | 2 +- x86_att/p521/bignum_tomont_p521.S | 2 +- x86_att/p521/bignum_triple_p521.S | 2 +- x86_att/p521/bignum_triple_p521_alt.S | 2 +- 57 files changed, 57 insertions(+), 45 deletions(-) diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S index bbe2dc014c..f0b7392f1f 100644 --- a/x86_att/curve25519/bignum_neg_p25519.S +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -26,6 +26,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p25519) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p25519) .text diff --git a/x86_att/p384/bignum_add_p384.S b/x86_att/p384/bignum_add_p384.S index 3a2be9c5e6..e527de6fba 100644 --- a/x86_att/p384/bignum_add_p384.S +++ b/x86_att/p384/bignum_add_p384.S @@ -26,6 +26,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p384) .text diff --git a/x86_att/p384/bignum_bigendian_6.S b/x86_att/p384/bignum_bigendian_6.S index 44da1b75bd..287e1bd392 100644 --- a/x86_att/p384/bignum_bigendian_6.S +++ b/x86_att/p384/bignum_bigendian_6.S @@ -38,6 +38,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_bigendian_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_bigendian_6) S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_frombebytes_6) diff --git a/x86_att/p384/bignum_cmul_p384.S b/x86_att/p384/bignum_cmul_p384.S index bd4afcd54e..4f03a1bd18 100644 --- a/x86_att/p384/bignum_cmul_p384.S +++ b/x86_att/p384/bignum_cmul_p384.S @@ -27,6 +27,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384) .text diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index f390c461f3..ff4b38658a 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -27,6 +27,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p384_alt) .text diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index 39648d3291..f029a18f4b 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -29,6 +29,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384) .text diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index e6aa8f5374..63b660f707 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -29,6 +29,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p384_alt) .text diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index 9ccaece15e..75124a9e29 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -29,6 +29,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384) .text diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index b4084025d2..1b4832bf03 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -29,6 +29,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p384_alt) .text diff --git a/x86_att/p384/bignum_double_p384.S b/x86_att/p384/bignum_double_p384.S index cb83e6e021..b7b2e5795d 100644 --- a/x86_att/p384/bignum_double_p384.S +++ b/x86_att/p384/bignum_double_p384.S @@ -26,6 +26,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p384) .text diff --git a/x86_att/p384/bignum_half_p384.S b/x86_att/p384/bignum_half_p384.S index 7e353c356c..eba7322510 100644 --- a/x86_att/p384/bignum_half_p384.S +++ b/x86_att/p384/bignum_half_p384.S @@ -26,6 +26,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p384) .text diff --git a/x86_att/p384/bignum_littleendian_6.S b/x86_att/p384/bignum_littleendian_6.S index e632a38491..670bca9136 100644 --- a/x86_att/p384/bignum_littleendian_6.S +++ b/x86_att/p384/bignum_littleendian_6.S @@ -37,6 +37,7 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_littleendian_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_littleendian_6) S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_6) diff --git a/x86_att/p384/bignum_mod_n384.S b/x86_att/p384/bignum_mod_n384.S index fcb8123352..a03c07c76b 100644 --- a/x86_att/p384/bignum_mod_n384.S +++ b/x86_att/p384/bignum_mod_n384.S @@ -28,9 +28,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_mod_n384_6.S b/x86_att/p384/bignum_mod_n384_6.S index d3048f01b1..8a0d199b2a 100644 --- a/x86_att/p384/bignum_mod_n384_6.S +++ b/x86_att/p384/bignum_mod_n384_6.S @@ -28,9 +28,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_6) - .text #define z %rdi diff --git a/x86_att/p384/bignum_mod_n384_alt.S b/x86_att/p384/bignum_mod_n384_alt.S index d4cf45f54a..0bb52c3983 100644 --- a/x86_att/p384/bignum_mod_n384_alt.S +++ b/x86_att/p384/bignum_mod_n384_alt.S @@ -28,9 +28,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n384_alt) - .text #define z %rdi diff --git a/x86_att/p384/bignum_mod_p384.S b/x86_att/p384/bignum_mod_p384.S index 3ee2ee3169..934275f270 100644 --- a/x86_att/p384/bignum_mod_p384.S +++ b/x86_att/p384/bignum_mod_p384.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_mod_p384_6.S b/x86_att/p384/bignum_mod_p384_6.S index 03955a2346..f898e0b00b 100644 --- a/x86_att/p384/bignum_mod_p384_6.S +++ b/x86_att/p384/bignum_mod_p384_6.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_6) - .text #define z %rdi diff --git a/x86_att/p384/bignum_mod_p384_alt.S b/x86_att/p384/bignum_mod_p384_alt.S index d1e8ccb9b5..e3ec6346b4 100644 --- a/x86_att/p384/bignum_mod_p384_alt.S +++ b/x86_att/p384/bignum_mod_p384_alt.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p384_alt) - .text #define z %rdi diff --git a/x86_att/p384/bignum_montmul_p384.S b/x86_att/p384/bignum_montmul_p384.S index 5744e71262..3ea3100dd9 100644 --- a/x86_att/p384/bignum_montmul_p384.S +++ b/x86_att/p384/bignum_montmul_p384.S @@ -30,9 +30,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_montmul_p384_alt.S b/x86_att/p384/bignum_montmul_p384_alt.S index cadb5d3457..3efcaced8c 100644 --- a/x86_att/p384/bignum_montmul_p384_alt.S +++ b/x86_att/p384/bignum_montmul_p384_alt.S @@ -30,9 +30,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p384_alt) - .text #define z %rdi diff --git a/x86_att/p384/bignum_montsqr_p384.S b/x86_att/p384/bignum_montsqr_p384.S index 05b9681f71..71901599aa 100644 --- a/x86_att/p384/bignum_montsqr_p384.S +++ b/x86_att/p384/bignum_montsqr_p384.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_montsqr_p384_alt.S b/x86_att/p384/bignum_montsqr_p384_alt.S index 08d421ecb9..652184c691 100644 --- a/x86_att/p384/bignum_montsqr_p384_alt.S +++ b/x86_att/p384/bignum_montsqr_p384_alt.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p384_alt) - .text #define z %rdi diff --git a/x86_att/p384/bignum_mux_6.S b/x86_att/p384/bignum_mux_6.S index 29ac9e24f9..0bf97ff129 100644 --- a/x86_att/p384/bignum_mux_6.S +++ b/x86_att/p384/bignum_mux_6.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mux_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mux_6) - .text #define p %rdi diff --git a/x86_att/p384/bignum_neg_p384.S b/x86_att/p384/bignum_neg_p384.S index b1941b3a11..8be796ba4e 100644 --- a/x86_att/p384/bignum_neg_p384.S +++ b/x86_att/p384/bignum_neg_p384.S @@ -25,9 +25,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_nonzero_6.S b/x86_att/p384/bignum_nonzero_6.S index 4888fa518d..d03e82bbae 100644 --- a/x86_att/p384/bignum_nonzero_6.S +++ b/x86_att/p384/bignum_nonzero_6.S @@ -25,9 +25,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_nonzero_6) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_nonzero_6) - .text #define x %rdi diff --git a/x86_att/p384/bignum_optneg_p384.S b/x86_att/p384/bignum_optneg_p384.S index aafce2b72f..77cf1f6499 100644 --- a/x86_att/p384/bignum_optneg_p384.S +++ b/x86_att/p384/bignum_optneg_p384.S @@ -27,9 +27,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_sub_p384.S b/x86_att/p384/bignum_sub_p384.S index e2c4776890..bfd4fa2b62 100644 --- a/x86_att/p384/bignum_sub_p384.S +++ b/x86_att/p384/bignum_sub_p384.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_tomont_p384.S b/x86_att/p384/bignum_tomont_p384.S index 73351c221e..e17ef46dcf 100644 --- a/x86_att/p384/bignum_tomont_p384.S +++ b/x86_att/p384/bignum_tomont_p384.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_tomont_p384_alt.S b/x86_att/p384/bignum_tomont_p384_alt.S index eccb5ba163..b81d583508 100644 --- a/x86_att/p384/bignum_tomont_p384_alt.S +++ b/x86_att/p384/bignum_tomont_p384_alt.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p384_alt) - .text #define z %rdi diff --git a/x86_att/p384/bignum_triple_p384.S b/x86_att/p384/bignum_triple_p384.S index 7cdf2399b9..356e20b316 100644 --- a/x86_att/p384/bignum_triple_p384.S +++ b/x86_att/p384/bignum_triple_p384.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384) - .text #define z %rdi diff --git a/x86_att/p384/bignum_triple_p384_alt.S b/x86_att/p384/bignum_triple_p384_alt.S index 52e6614a80..d8e0506a60 100644 --- a/x86_att/p384/bignum_triple_p384_alt.S +++ b/x86_att/p384/bignum_triple_p384_alt.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p384_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p384_alt) - .text #define z %rdi diff --git a/x86_att/p521/bignum_add_p521.S b/x86_att/p521/bignum_add_p521.S index 149d0df0a5..080e98807d 100644 --- a/x86_att/p521/bignum_add_p521.S +++ b/x86_att/p521/bignum_add_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_cmul_p521.S b/x86_att/p521/bignum_cmul_p521.S index 118910535b..ce8dded8dd 100644 --- a/x86_att/p521/bignum_cmul_p521.S +++ b/x86_att/p521/bignum_cmul_p521.S @@ -27,9 +27,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_cmul_p521_alt.S b/x86_att/p521/bignum_cmul_p521_alt.S index 640a1de89c..335d8e5cf7 100644 --- a/x86_att/p521/bignum_cmul_p521_alt.S +++ b/x86_att/p521/bignum_cmul_p521_alt.S @@ -27,9 +27,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul_p521_alt) - .text #define z %rdi diff --git a/x86_att/p521/bignum_deamont_p521.S b/x86_att/p521/bignum_deamont_p521.S index a118267d02..de4f767fb1 100644 --- a/x86_att/p521/bignum_deamont_p521.S +++ b/x86_att/p521/bignum_deamont_p521.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_deamont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_deamont_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_demont_p521.S b/x86_att/p521/bignum_demont_p521.S index b99c6298ce..5e8c2c7406 100644 --- a/x86_att/p521/bignum_demont_p521.S +++ b/x86_att/p521/bignum_demont_p521.S @@ -29,9 +29,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_demont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_demont_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_double_p521.S b/x86_att/p521/bignum_double_p521.S index 33fe1e5a90..ea904c8bc4 100644 --- a/x86_att/p521/bignum_double_p521.S +++ b/x86_att/p521/bignum_double_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_double_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_double_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_fromlebytes_p521.S b/x86_att/p521/bignum_fromlebytes_p521.S index 3226f43839..1c28af57a2 100644 --- a/x86_att/p521/bignum_fromlebytes_p521.S +++ b/x86_att/p521/bignum_fromlebytes_p521.S @@ -30,9 +30,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_fromlebytes_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_fromlebytes_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_half_p521.S b/x86_att/p521/bignum_half_p521.S index 505474f5f4..47fa3e6f49 100644 --- a/x86_att/p521/bignum_half_p521.S +++ b/x86_att/p521/bignum_half_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_half_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_half_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_mod_n521_9.S b/x86_att/p521/bignum_mod_n521_9.S index 1bb43aa316..d17c196747 100644 --- a/x86_att/p521/bignum_mod_n521_9.S +++ b/x86_att/p521/bignum_mod_n521_9.S @@ -28,9 +28,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9) - .text #define z %rdi diff --git a/x86_att/p521/bignum_mod_n521_9_alt.S b/x86_att/p521/bignum_mod_n521_9_alt.S index 9421f4ab08..3da8d997be 100644 --- a/x86_att/p521/bignum_mod_n521_9_alt.S +++ b/x86_att/p521/bignum_mod_n521_9_alt.S @@ -28,9 +28,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n521_9_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n521_9_alt) - .text #define z %rdi diff --git a/x86_att/p521/bignum_mod_p521_9.S b/x86_att/p521/bignum_mod_p521_9.S index ef8cc7d5fc..96d218065e 100644 --- a/x86_att/p521/bignum_mod_p521_9.S +++ b/x86_att/p521/bignum_mod_p521_9.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p521_9) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p521_9) - .text #define z %rdi diff --git a/x86_att/p521/bignum_montmul_p521.S b/x86_att/p521/bignum_montmul_p521.S index f6a74b3783..be232ea29c 100644 --- a/x86_att/p521/bignum_montmul_p521.S +++ b/x86_att/p521/bignum_montmul_p521.S @@ -31,9 +31,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_montmul_p521_alt.S b/x86_att/p521/bignum_montmul_p521_alt.S index 05caf63424..7631458755 100644 --- a/x86_att/p521/bignum_montmul_p521_alt.S +++ b/x86_att/p521/bignum_montmul_p521_alt.S @@ -31,9 +31,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montmul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montmul_p521_alt) - .text #define z %rdi diff --git a/x86_att/p521/bignum_montsqr_p521.S b/x86_att/p521/bignum_montsqr_p521.S index 689472385a..2d9acc7529 100644 --- a/x86_att/p521/bignum_montsqr_p521.S +++ b/x86_att/p521/bignum_montsqr_p521.S @@ -31,9 +31,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_montsqr_p521_alt.S b/x86_att/p521/bignum_montsqr_p521_alt.S index 84adf6bdb3..84f1af05b6 100644 --- a/x86_att/p521/bignum_montsqr_p521_alt.S +++ b/x86_att/p521/bignum_montsqr_p521_alt.S @@ -31,9 +31,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montsqr_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montsqr_p521_alt) - .text // Input arguments diff --git a/x86_att/p521/bignum_mul_p521.S b/x86_att/p521/bignum_mul_p521.S index 3c8fa48eae..e25129c059 100644 --- a/x86_att/p521/bignum_mul_p521.S +++ b/x86_att/p521/bignum_mul_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_mul_p521_alt.S b/x86_att/p521/bignum_mul_p521_alt.S index bc986e4a87..d05a7cb0b0 100644 --- a/x86_att/p521/bignum_mul_p521_alt.S +++ b/x86_att/p521/bignum_mul_p521_alt.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_p521_alt) - .text #define z %rdi diff --git a/x86_att/p521/bignum_neg_p521.S b/x86_att/p521/bignum_neg_p521.S index eed3614dec..d6eb825754 100644 --- a/x86_att/p521/bignum_neg_p521.S +++ b/x86_att/p521/bignum_neg_p521.S @@ -25,9 +25,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_neg_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_neg_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_optneg_p521.S b/x86_att/p521/bignum_optneg_p521.S index 80bb33ffb5..e910140ad8 100644 --- a/x86_att/p521/bignum_optneg_p521.S +++ b/x86_att/p521/bignum_optneg_p521.S @@ -27,9 +27,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_optneg_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_optneg_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_sqr_p521.S b/x86_att/p521/bignum_sqr_p521.S index 74b88a28e8..6e2d9b5562 100644 --- a/x86_att/p521/bignum_sqr_p521.S +++ b/x86_att/p521/bignum_sqr_p521.S @@ -25,9 +25,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_sqr_p521_alt.S b/x86_att/p521/bignum_sqr_p521_alt.S index 44cf65b544..c2fae3c25f 100644 --- a/x86_att/p521/bignum_sqr_p521_alt.S +++ b/x86_att/p521/bignum_sqr_p521_alt.S @@ -25,9 +25,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_p521_alt) - .text // Input arguments diff --git a/x86_att/p521/bignum_sub_p521.S b/x86_att/p521/bignum_sub_p521.S index 3ce8104b39..bcd9ed4a4b 100644 --- a/x86_att/p521/bignum_sub_p521.S +++ b/x86_att/p521/bignum_sub_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_tolebytes_p521.S b/x86_att/p521/bignum_tolebytes_p521.S index 5d2bc9a654..7fb403239e 100644 --- a/x86_att/p521/bignum_tolebytes_p521.S +++ b/x86_att/p521/bignum_tolebytes_p521.S @@ -30,9 +30,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tolebytes_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tolebytes_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_tomont_p521.S b/x86_att/p521/bignum_tomont_p521.S index fa05076593..df8f277345 100644 --- a/x86_att/p521/bignum_tomont_p521.S +++ b/x86_att/p521/bignum_tomont_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_tomont_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_tomont_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_triple_p521.S b/x86_att/p521/bignum_triple_p521.S index 9380372cea..484dab04c8 100644 --- a/x86_att/p521/bignum_triple_p521.S +++ b/x86_att/p521/bignum_triple_p521.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521) - .text #define z %rdi diff --git a/x86_att/p521/bignum_triple_p521_alt.S b/x86_att/p521/bignum_triple_p521_alt.S index 5f23a3a440..9e25a2c853 100644 --- a/x86_att/p521/bignum_triple_p521_alt.S +++ b/x86_att/p521/bignum_triple_p521_alt.S @@ -26,9 +26,9 @@ #include "_internal_s2n_bignum.h" + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_triple_p521_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_triple_p521_alt) - .text #define z %rdi From de00f966e8e77daf77cbc5bc030a9e84681979e7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 1 Jul 2022 20:50:08 -0500 Subject: [PATCH 12/42] Add basic NIST P-384 point operations A point doubling function, point addition function, and point mixed addition function for the P-384 curve, all using Jacobian coordinates in a Montgomery representation, with input nondegeneracy assumed. Once again, the addition and mixed addition functions offer only marginal efficiency gains over just calling a sequence of basic field operations, but the doubling has some beneficial mathematically equivalent short-cutting of the intermediate modular reductions. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/df8e913c542e5392a9f9cb6cd42fc90c5a02f72e --- arm/p384/Makefile | 5 +- arm/p384/p384_montjadd.S | 893 +++++++++++++++++++++++++++ arm/p384/p384_montjdouble.S | 963 +++++++++++++++++++++++++++++ arm/p384/p384_montjmixadd.S | 884 +++++++++++++++++++++++++++ x86_att/p384/p384_montjadd.S | 955 +++++++++++++++++++++++++++++ x86_att/p384/p384_montjdouble.S | 1014 +++++++++++++++++++++++++++++++ x86_att/p384/p384_montjmixadd.S | 941 ++++++++++++++++++++++++++++ 7 files changed, 5654 insertions(+), 1 deletion(-) create mode 100644 arm/p384/p384_montjadd.S create mode 100644 arm/p384/p384_montjdouble.S create mode 100644 arm/p384/p384_montjmixadd.S create mode 100644 x86_att/p384/p384_montjadd.S create mode 100644 x86_att/p384/p384_montjdouble.S create mode 100644 x86_att/p384/p384_montjmixadd.S diff --git a/arm/p384/Makefile b/arm/p384/Makefile index 469a20ff12..11a5605504 100644 --- a/arm/p384/Makefile +++ b/arm/p384/Makefile @@ -53,7 +53,10 @@ OBJ = bignum_add_p384.o \ bignum_optneg_p384.o \ bignum_sub_p384.o \ bignum_tomont_p384.o \ - bignum_triple_p384.o + bignum_triple_p384.o \ + p384_montjadd.o \ + p384_montjdouble.o \ + p384_montjmixadd.o %.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ - diff --git a/arm/p384/p384_montjadd.S b/arm/p384/p384_montjadd.S new file mode 100644 index 0000000000..138afa9dc3 --- /dev/null +++ b/arm/p384/p384_montjadd.S @@ -0,0 +1,893 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjadd +// (uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// +// Standard ARM ABI: X0 = p3, X1 = p1, X2 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjadd) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Stable homes for input arguments during main code sequence + +#define input_z x24 +#define input_x x25 +#define input_y x26 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_2 input_y, #0 +#define y_2 input_y, #NUMSIZE +#define z_2 input_y, #(2*NUMSIZE) + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z1sq sp, #(NUMSIZE*0) +#define ww sp, #(NUMSIZE*0) + +#define yd sp, #(NUMSIZE*1) +#define y2a sp, #(NUMSIZE*1) + +#define x2a sp, #(NUMSIZE*2) +#define zzx2 sp, #(NUMSIZE*2) + +#define zz sp, #(NUMSIZE*3) +#define t1 sp, #(NUMSIZE*3) + +#define t2 sp, #(NUMSIZE*4) +#define x1a sp, #(NUMSIZE*4) +#define zzx1 sp, #(NUMSIZE*4) + +#define xd sp, #(NUMSIZE*5) +#define z2sq sp, #(NUMSIZE*5) + +#define y1a sp, #(NUMSIZE*6) + +#define NSPACE (NUMSIZE*7) + +// Corresponds exactly to bignum_montmul_p384_alt + +#define montmul_p384(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x12, x3, x5; \ + umulh x13, x3, x5; \ + mul x11, x3, x6; \ + umulh x14, x3, x6; \ + adds x13, x13, x11; \ + ldp x7, x8, [P2+16]; \ + mul x11, x3, x7; \ + umulh x15, x3, x7; \ + adcs x14, x14, x11; \ + mul x11, x3, x8; \ + umulh x16, x3, x8; \ + adcs x15, x15, x11; \ + ldp x9, x10, [P2+32]; \ + mul x11, x3, x9; \ + umulh x17, x3, x9; \ + adcs x16, x16, x11; \ + mul x11, x3, x10; \ + umulh x19, x3, x10; \ + adcs x17, x17, x11; \ + adc x19, x19, xzr; \ + mul x11, x4, x5; \ + adds x13, x13, x11; \ + mul x11, x4, x6; \ + adcs x14, x14, x11; \ + mul x11, x4, x7; \ + adcs x15, x15, x11; \ + mul x11, x4, x8; \ + adcs x16, x16, x11; \ + mul x11, x4, x9; \ + adcs x17, x17, x11; \ + mul x11, x4, x10; \ + adcs x19, x19, x11; \ + cset x20, cs; \ + umulh x11, x4, x5; \ + adds x14, x14, x11; \ + umulh x11, x4, x6; \ + adcs x15, x15, x11; \ + umulh x11, x4, x7; \ + adcs x16, x16, x11; \ + umulh x11, x4, x8; \ + adcs x17, x17, x11; \ + umulh x11, x4, x9; \ + adcs x19, x19, x11; \ + umulh x11, x4, x10; \ + adc x20, x20, x11; \ + ldp x3, x4, [P1+16]; \ + mul x11, x3, x5; \ + adds x14, x14, x11; \ + mul x11, x3, x6; \ + adcs x15, x15, x11; \ + mul x11, x3, x7; \ + adcs x16, x16, x11; \ + mul x11, x3, x8; \ + adcs x17, x17, x11; \ + mul x11, x3, x9; \ + adcs x19, x19, x11; \ + mul x11, x3, x10; \ + adcs x20, x20, x11; \ + cset x21, cs; \ + umulh x11, x3, x5; \ + adds x15, x15, x11; \ + umulh x11, x3, x6; \ + adcs x16, x16, x11; \ + umulh x11, x3, x7; \ + adcs x17, x17, x11; \ + umulh x11, x3, x8; \ + adcs x19, x19, x11; \ + umulh x11, x3, x9; \ + adcs x20, x20, x11; \ + umulh x11, x3, x10; \ + adc x21, x21, x11; \ + mul x11, x4, x5; \ + adds x15, x15, x11; \ + mul x11, x4, x6; \ + adcs x16, x16, x11; \ + mul x11, x4, x7; \ + adcs x17, x17, x11; \ + mul x11, x4, x8; \ + adcs x19, x19, x11; \ + mul x11, x4, x9; \ + adcs x20, x20, x11; \ + mul x11, x4, x10; \ + adcs x21, x21, x11; \ + cset x22, cs; \ + umulh x11, x4, x5; \ + adds x16, x16, x11; \ + umulh x11, x4, x6; \ + adcs x17, x17, x11; \ + umulh x11, x4, x7; \ + adcs x19, x19, x11; \ + umulh x11, x4, x8; \ + adcs x20, x20, x11; \ + umulh x11, x4, x9; \ + adcs x21, x21, x11; \ + umulh x11, x4, x10; \ + adc x22, x22, x11; \ + ldp x3, x4, [P1+32]; \ + mul x11, x3, x5; \ + adds x16, x16, x11; \ + mul x11, x3, x6; \ + adcs x17, x17, x11; \ + mul x11, x3, x7; \ + adcs x19, x19, x11; \ + mul x11, x3, x8; \ + adcs x20, x20, x11; \ + mul x11, x3, x9; \ + adcs x21, x21, x11; \ + mul x11, x3, x10; \ + adcs x22, x22, x11; \ + cset x2, cs; \ + umulh x11, x3, x5; \ + adds x17, x17, x11; \ + umulh x11, x3, x6; \ + adcs x19, x19, x11; \ + umulh x11, x3, x7; \ + adcs x20, x20, x11; \ + umulh x11, x3, x8; \ + adcs x21, x21, x11; \ + umulh x11, x3, x9; \ + adcs x22, x22, x11; \ + umulh x11, x3, x10; \ + adc x2, x2, x11; \ + mul x11, x4, x5; \ + adds x17, x17, x11; \ + mul x11, x4, x6; \ + adcs x19, x19, x11; \ + mul x11, x4, x7; \ + adcs x20, x20, x11; \ + mul x11, x4, x8; \ + adcs x21, x21, x11; \ + mul x11, x4, x9; \ + adcs x22, x22, x11; \ + mul x11, x4, x10; \ + adcs x2, x2, x11; \ + cset x1, cs; \ + umulh x11, x4, x5; \ + adds x19, x19, x11; \ + umulh x11, x4, x6; \ + adcs x20, x20, x11; \ + umulh x11, x4, x7; \ + adcs x21, x21, x11; \ + umulh x11, x4, x8; \ + adcs x22, x22, x11; \ + umulh x11, x4, x9; \ + adcs x2, x2, x11; \ + umulh x11, x4, x10; \ + adc x1, x1, x11; \ + lsl x7, x12, #32; \ + add x12, x7, x12; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x12; \ + mov x6, #0xffffffff; \ + mul x5, x6, x12; \ + umulh x6, x6, x12; \ + adds x7, x7, x5; \ + adcs x6, x6, x12; \ + adc x5, xzr, xzr; \ + subs x13, x13, x7; \ + sbcs x14, x14, x6; \ + sbcs x15, x15, x5; \ + sbcs x16, x16, xzr; \ + sbcs x17, x17, xzr; \ + sbc x12, x12, xzr; \ + lsl x7, x13, #32; \ + add x13, x7, x13; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x13; \ + mov x6, #0xffffffff; \ + mul x5, x6, x13; \ + umulh x6, x6, x13; \ + adds x7, x7, x5; \ + adcs x6, x6, x13; \ + adc x5, xzr, xzr; \ + subs x14, x14, x7; \ + sbcs x15, x15, x6; \ + sbcs x16, x16, x5; \ + sbcs x17, x17, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + lsl x7, x14, #32; \ + add x14, x7, x14; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x14; \ + mov x6, #0xffffffff; \ + mul x5, x6, x14; \ + umulh x6, x6, x14; \ + adds x7, x7, x5; \ + adcs x6, x6, x14; \ + adc x5, xzr, xzr; \ + subs x15, x15, x7; \ + sbcs x16, x16, x6; \ + sbcs x17, x17, x5; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x14, x14, xzr; \ + lsl x7, x15, #32; \ + add x15, x7, x15; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x15; \ + mov x6, #0xffffffff; \ + mul x5, x6, x15; \ + umulh x6, x6, x15; \ + adds x7, x7, x5; \ + adcs x6, x6, x15; \ + adc x5, xzr, xzr; \ + subs x16, x16, x7; \ + sbcs x17, x17, x6; \ + sbcs x12, x12, x5; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + lsl x7, x16, #32; \ + add x16, x7, x16; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x16; \ + mov x6, #0xffffffff; \ + mul x5, x6, x16; \ + umulh x6, x6, x16; \ + adds x7, x7, x5; \ + adcs x6, x6, x16; \ + adc x5, xzr, xzr; \ + subs x17, x17, x7; \ + sbcs x12, x12, x6; \ + sbcs x13, x13, x5; \ + sbcs x14, x14, xzr; \ + sbcs x15, x15, xzr; \ + sbc x16, x16, xzr; \ + lsl x7, x17, #32; \ + add x17, x7, x17; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x17; \ + mov x6, #0xffffffff; \ + mul x5, x6, x17; \ + umulh x6, x6, x17; \ + adds x7, x7, x5; \ + adcs x6, x6, x17; \ + adc x5, xzr, xzr; \ + subs x12, x12, x7; \ + sbcs x13, x13, x6; \ + sbcs x14, x14, x5; \ + sbcs x15, x15, xzr; \ + sbcs x16, x16, xzr; \ + sbc x17, x17, xzr; \ + adds x12, x12, x19; \ + adcs x13, x13, x20; \ + adcs x14, x14, x21; \ + adcs x15, x15, x22; \ + adcs x16, x16, x2; \ + adcs x17, x17, x1; \ + adc x10, xzr, xzr; \ + mov x11, #0xffffffff00000001; \ + adds x19, x12, x11; \ + mov x11, #0xffffffff; \ + adcs x20, x13, x11; \ + mov x11, #0x1; \ + adcs x21, x14, x11; \ + adcs x22, x15, xzr; \ + adcs x2, x16, xzr; \ + adcs x1, x17, xzr; \ + adcs x10, x10, xzr; \ + csel x12, x12, x19, eq; \ + csel x13, x13, x20, eq; \ + csel x14, x14, x21, eq; \ + csel x15, x15, x22, eq; \ + csel x16, x16, x2, eq; \ + csel x17, x17, x1, eq; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16]; \ + stp x16, x17, [P0+32] + +// Corresponds exactly to bignum_montsqr_p384_alt + +#define montsqr_p384(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x8, x2, x4; \ + adds x10, x10, x8; \ + mul x11, x2, x5; \ + mul x8, x3, x4; \ + adcs x11, x11, x8; \ + umulh x12, x2, x5; \ + mul x8, x3, x5; \ + adcs x12, x12, x8; \ + ldp x6, x7, [P1+32]; \ + mul x13, x2, x7; \ + mul x8, x3, x6; \ + adcs x13, x13, x8; \ + umulh x14, x2, x7; \ + mul x8, x3, x7; \ + adcs x14, x14, x8; \ + mul x15, x5, x6; \ + adcs x15, x15, xzr; \ + umulh x16, x5, x6; \ + adc x16, x16, xzr; \ + umulh x8, x2, x4; \ + adds x11, x11, x8; \ + umulh x8, x3, x4; \ + adcs x12, x12, x8; \ + umulh x8, x3, x5; \ + adcs x13, x13, x8; \ + umulh x8, x3, x6; \ + adcs x14, x14, x8; \ + umulh x8, x3, x7; \ + adcs x15, x15, x8; \ + adc x16, x16, xzr; \ + mul x8, x2, x6; \ + adds x12, x12, x8; \ + mul x8, x4, x5; \ + adcs x13, x13, x8; \ + mul x8, x4, x6; \ + adcs x14, x14, x8; \ + mul x8, x4, x7; \ + adcs x15, x15, x8; \ + mul x8, x5, x7; \ + adcs x16, x16, x8; \ + mul x17, x6, x7; \ + adcs x17, x17, xzr; \ + umulh x19, x6, x7; \ + adc x19, x19, xzr; \ + umulh x8, x2, x6; \ + adds x13, x13, x8; \ + umulh x8, x4, x5; \ + adcs x14, x14, x8; \ + umulh x8, x4, x6; \ + adcs x15, x15, x8; \ + umulh x8, x4, x7; \ + adcs x16, x16, x8; \ + umulh x8, x5, x7; \ + adcs x17, x17, x8; \ + adc x19, x19, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + cset x20, hs; \ + umulh x8, x2, x2; \ + mul x2, x2, x2; \ + adds x9, x9, x8; \ + mul x8, x3, x3; \ + adcs x10, x10, x8; \ + umulh x8, x3, x3; \ + adcs x11, x11, x8; \ + mul x8, x4, x4; \ + adcs x12, x12, x8; \ + umulh x8, x4, x4; \ + adcs x13, x13, x8; \ + mul x8, x5, x5; \ + adcs x14, x14, x8; \ + umulh x8, x5, x5; \ + adcs x15, x15, x8; \ + mul x8, x6, x6; \ + adcs x16, x16, x8; \ + umulh x8, x6, x6; \ + adcs x17, x17, x8; \ + mul x8, x7, x7; \ + adcs x19, x19, x8; \ + umulh x8, x7, x7; \ + adc x20, x20, x8; \ + lsl x5, x2, #32; \ + add x2, x5, x2; \ + mov x5, #-4294967295; \ + umulh x5, x5, x2; \ + mov x4, #4294967295; \ + mul x3, x4, x2; \ + umulh x4, x4, x2; \ + adds x5, x5, x3; \ + adcs x4, x4, x2; \ + adc x3, xzr, xzr; \ + subs x9, x9, x5; \ + sbcs x10, x10, x4; \ + sbcs x11, x11, x3; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x2, x2, xzr; \ + lsl x5, x9, #32; \ + add x9, x5, x9; \ + mov x5, #-4294967295; \ + umulh x5, x5, x9; \ + mov x4, #4294967295; \ + mul x3, x4, x9; \ + umulh x4, x4, x9; \ + adds x5, x5, x3; \ + adcs x4, x4, x9; \ + adc x3, xzr, xzr; \ + subs x10, x10, x5; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + sbcs x13, x13, xzr; \ + sbcs x2, x2, xzr; \ + sbc x9, x9, xzr; \ + lsl x5, x10, #32; \ + add x10, x5, x10; \ + mov x5, #-4294967295; \ + umulh x5, x5, x10; \ + mov x4, #4294967295; \ + mul x3, x4, x10; \ + umulh x4, x4, x10; \ + adds x5, x5, x3; \ + adcs x4, x4, x10; \ + adc x3, xzr, xzr; \ + subs x11, x11, x5; \ + sbcs x12, x12, x4; \ + sbcs x13, x13, x3; \ + sbcs x2, x2, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + lsl x5, x11, #32; \ + add x11, x5, x11; \ + mov x5, #-4294967295; \ + umulh x5, x5, x11; \ + mov x4, #4294967295; \ + mul x3, x4, x11; \ + umulh x4, x4, x11; \ + adds x5, x5, x3; \ + adcs x4, x4, x11; \ + adc x3, xzr, xzr; \ + subs x12, x12, x5; \ + sbcs x13, x13, x4; \ + sbcs x2, x2, x3; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbc x11, x11, xzr; \ + lsl x5, x12, #32; \ + add x12, x5, x12; \ + mov x5, #-4294967295; \ + umulh x5, x5, x12; \ + mov x4, #4294967295; \ + mul x3, x4, x12; \ + umulh x4, x4, x12; \ + adds x5, x5, x3; \ + adcs x4, x4, x12; \ + adc x3, xzr, xzr; \ + subs x13, x13, x5; \ + sbcs x2, x2, x4; \ + sbcs x9, x9, x3; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbc x12, x12, xzr; \ + lsl x5, x13, #32; \ + add x13, x5, x13; \ + mov x5, #-4294967295; \ + umulh x5, x5, x13; \ + mov x4, #4294967295; \ + mul x3, x4, x13; \ + umulh x4, x4, x13; \ + adds x5, x5, x3; \ + adcs x4, x4, x13; \ + adc x3, xzr, xzr; \ + subs x2, x2, x5; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, x17; \ + adcs x12, x12, x19; \ + adcs x13, x13, x20; \ + adc x6, xzr, xzr; \ + mov x8, #-4294967295; \ + adds x14, x2, x8; \ + mov x8, #4294967295; \ + adcs x15, x9, x8; \ + mov x8, #1; \ + adcs x16, x10, x8; \ + adcs x17, x11, xzr; \ + adcs x19, x12, xzr; \ + adcs x20, x13, xzr; \ + adcs x6, x6, xzr; \ + csel x2, x2, x14, eq; \ + csel x9, x9, x15, eq; \ + csel x10, x10, x16, eq; \ + csel x11, x11, x17, eq; \ + csel x12, x12, x19, eq; \ + csel x13, x13, x20, eq; \ + stp x2, x9, [P0]; \ + stp x10, x11, [P0+16]; \ + stp x12, x13, [P0+32] + +// Almost-Montgomery variant which we use when an input to other muls +// with the other argument fully reduced (which is always safe). In +// fact, with the Karatsuba-based Montgomery mul here, we don't even +// *need* the restriction that the other argument is reduced. + +#define amontsqr_p384(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x8, x2, x4; \ + adds x10, x10, x8; \ + mul x11, x2, x5; \ + mul x8, x3, x4; \ + adcs x11, x11, x8; \ + umulh x12, x2, x5; \ + mul x8, x3, x5; \ + adcs x12, x12, x8; \ + ldp x6, x7, [P1+32]; \ + mul x13, x2, x7; \ + mul x8, x3, x6; \ + adcs x13, x13, x8; \ + umulh x14, x2, x7; \ + mul x8, x3, x7; \ + adcs x14, x14, x8; \ + mul x15, x5, x6; \ + adcs x15, x15, xzr; \ + umulh x16, x5, x6; \ + adc x16, x16, xzr; \ + umulh x8, x2, x4; \ + adds x11, x11, x8; \ + umulh x8, x3, x4; \ + adcs x12, x12, x8; \ + umulh x8, x3, x5; \ + adcs x13, x13, x8; \ + umulh x8, x3, x6; \ + adcs x14, x14, x8; \ + umulh x8, x3, x7; \ + adcs x15, x15, x8; \ + adc x16, x16, xzr; \ + mul x8, x2, x6; \ + adds x12, x12, x8; \ + mul x8, x4, x5; \ + adcs x13, x13, x8; \ + mul x8, x4, x6; \ + adcs x14, x14, x8; \ + mul x8, x4, x7; \ + adcs x15, x15, x8; \ + mul x8, x5, x7; \ + adcs x16, x16, x8; \ + mul x17, x6, x7; \ + adcs x17, x17, xzr; \ + umulh x19, x6, x7; \ + adc x19, x19, xzr; \ + umulh x8, x2, x6; \ + adds x13, x13, x8; \ + umulh x8, x4, x5; \ + adcs x14, x14, x8; \ + umulh x8, x4, x6; \ + adcs x15, x15, x8; \ + umulh x8, x4, x7; \ + adcs x16, x16, x8; \ + umulh x8, x5, x7; \ + adcs x17, x17, x8; \ + adc x19, x19, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + cset x20, hs; \ + umulh x8, x2, x2; \ + mul x2, x2, x2; \ + adds x9, x9, x8; \ + mul x8, x3, x3; \ + adcs x10, x10, x8; \ + umulh x8, x3, x3; \ + adcs x11, x11, x8; \ + mul x8, x4, x4; \ + adcs x12, x12, x8; \ + umulh x8, x4, x4; \ + adcs x13, x13, x8; \ + mul x8, x5, x5; \ + adcs x14, x14, x8; \ + umulh x8, x5, x5; \ + adcs x15, x15, x8; \ + mul x8, x6, x6; \ + adcs x16, x16, x8; \ + umulh x8, x6, x6; \ + adcs x17, x17, x8; \ + mul x8, x7, x7; \ + adcs x19, x19, x8; \ + umulh x8, x7, x7; \ + adc x20, x20, x8; \ + lsl x5, x2, #32; \ + add x2, x5, x2; \ + mov x5, #-4294967295; \ + umulh x5, x5, x2; \ + mov x4, #4294967295; \ + mul x3, x4, x2; \ + umulh x4, x4, x2; \ + adds x5, x5, x3; \ + adcs x4, x4, x2; \ + adc x3, xzr, xzr; \ + subs x9, x9, x5; \ + sbcs x10, x10, x4; \ + sbcs x11, x11, x3; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x2, x2, xzr; \ + lsl x5, x9, #32; \ + add x9, x5, x9; \ + mov x5, #-4294967295; \ + umulh x5, x5, x9; \ + mov x4, #4294967295; \ + mul x3, x4, x9; \ + umulh x4, x4, x9; \ + adds x5, x5, x3; \ + adcs x4, x4, x9; \ + adc x3, xzr, xzr; \ + subs x10, x10, x5; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + sbcs x13, x13, xzr; \ + sbcs x2, x2, xzr; \ + sbc x9, x9, xzr; \ + lsl x5, x10, #32; \ + add x10, x5, x10; \ + mov x5, #-4294967295; \ + umulh x5, x5, x10; \ + mov x4, #4294967295; \ + mul x3, x4, x10; \ + umulh x4, x4, x10; \ + adds x5, x5, x3; \ + adcs x4, x4, x10; \ + adc x3, xzr, xzr; \ + subs x11, x11, x5; \ + sbcs x12, x12, x4; \ + sbcs x13, x13, x3; \ + sbcs x2, x2, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + lsl x5, x11, #32; \ + add x11, x5, x11; \ + mov x5, #-4294967295; \ + umulh x5, x5, x11; \ + mov x4, #4294967295; \ + mul x3, x4, x11; \ + umulh x4, x4, x11; \ + adds x5, x5, x3; \ + adcs x4, x4, x11; \ + adc x3, xzr, xzr; \ + subs x12, x12, x5; \ + sbcs x13, x13, x4; \ + sbcs x2, x2, x3; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbc x11, x11, xzr; \ + lsl x5, x12, #32; \ + add x12, x5, x12; \ + mov x5, #-4294967295; \ + umulh x5, x5, x12; \ + mov x4, #4294967295; \ + mul x3, x4, x12; \ + umulh x4, x4, x12; \ + adds x5, x5, x3; \ + adcs x4, x4, x12; \ + adc x3, xzr, xzr; \ + subs x13, x13, x5; \ + sbcs x2, x2, x4; \ + sbcs x9, x9, x3; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbc x12, x12, xzr; \ + lsl x5, x13, #32; \ + add x13, x5, x13; \ + mov x5, #-4294967295; \ + umulh x5, x5, x13; \ + mov x4, #4294967295; \ + mul x3, x4, x13; \ + umulh x4, x4, x13; \ + adds x5, x5, x3; \ + adcs x4, x4, x13; \ + adc x3, xzr, xzr; \ + subs x2, x2, x5; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, x17; \ + adcs x12, x12, x19; \ + adcs x13, x13, x20; \ + mov x14, #-4294967295; \ + mov x15, #4294967295; \ + csel x14, x14, xzr, cs; \ + csel x15, x15, xzr, cs; \ + cset x16, cs; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, xzr; \ + adcs x12, x12, xzr; \ + adc x13, x13, xzr; \ + stp x2, x9, [P0]; \ + stp x10, x11, [P0+16]; \ + stp x12, x13, [P0+32] + +// Corresponds exactly to bignum_sub_p384 + +#define sub_p384(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + csetm x3, lo; \ + mov x4, #4294967295; \ + and x4, x4, x3; \ + adds x5, x5, x4; \ + eor x4, x4, x3; \ + adcs x6, x6, x4; \ + mov x4, #-2; \ + and x4, x4, x3; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + adcs x9, x9, x3; \ + adc x10, x10, x3; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32] + +S2N_BN_SYMBOL(p384_montjadd): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + mov input_y, x2 + +// Main code, just a sequence of basic field operations +// 8 * multiply + 3 * square + 7 * subtract + + amontsqr_p384(z1sq,z_1) + amontsqr_p384(z2sq,z_2) + + montmul_p384(y1a,z_2,y_1) + montmul_p384(y2a,z_1,y_2) + + montmul_p384(x2a,z1sq,x_2) + montmul_p384(x1a,z2sq,x_1) + montmul_p384(y2a,z1sq,y2a) + montmul_p384(y1a,z2sq,y1a) + + sub_p384(xd,x2a,x1a) + sub_p384(yd,y2a,y1a) + + amontsqr_p384(zz,xd) + montsqr_p384(ww,yd) + + montmul_p384(zzx1,zz,x1a) + montmul_p384(zzx2,zz,x2a) + + sub_p384(x_3,ww,zzx1) + sub_p384(t1,zzx2,zzx1) + + montmul_p384(xd,xd,z_1) + + sub_p384(x_3,x_3,zzx2) + + sub_p384(t2,zzx1,x_3) + + montmul_p384(t1,t1,y1a) + montmul_p384(z_3,xd,z_2) + montmul_p384(t2,yd,t2) + + sub_p384(y_3,t2,t1) + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x25, x26, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/p384/p384_montjdouble.S b/arm/p384/p384_montjdouble.S new file mode 100644 index 0000000000..8fa2ad3234 --- /dev/null +++ b/arm/p384/p384_montjdouble.S @@ -0,0 +1,963 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjdouble +// (uint64_t p3[static 18],uint64_t p1[static 18]); +// +// Does p3 := 2 * p1 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// +// Standard ARM ABI: X0 = p3, X1 = p1 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjdouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjdouble) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Stable homes for input arguments during main code sequence + +#define input_z x23 +#define input_x x24 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z2 sp, #(NUMSIZE*0) +#define y2 sp, #(NUMSIZE*1) +#define x2p sp, #(NUMSIZE*2) +#define xy2 sp, #(NUMSIZE*3) + +#define y4 sp, #(NUMSIZE*4) +#define t2 sp, #(NUMSIZE*4) + +#define dx2 sp, #(NUMSIZE*5) +#define t1 sp, #(NUMSIZE*5) + +#define d sp, #(NUMSIZE*6) +#define x4p sp, #(NUMSIZE*6) + +#define NSPACE (NUMSIZE*7) + +// Corresponds exactly to bignum_montmul_p384_alt + +#define montmul_p384(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x12, x3, x5; \ + umulh x13, x3, x5; \ + mul x11, x3, x6; \ + umulh x14, x3, x6; \ + adds x13, x13, x11; \ + ldp x7, x8, [P2+16]; \ + mul x11, x3, x7; \ + umulh x15, x3, x7; \ + adcs x14, x14, x11; \ + mul x11, x3, x8; \ + umulh x16, x3, x8; \ + adcs x15, x15, x11; \ + ldp x9, x10, [P2+32]; \ + mul x11, x3, x9; \ + umulh x17, x3, x9; \ + adcs x16, x16, x11; \ + mul x11, x3, x10; \ + umulh x19, x3, x10; \ + adcs x17, x17, x11; \ + adc x19, x19, xzr; \ + mul x11, x4, x5; \ + adds x13, x13, x11; \ + mul x11, x4, x6; \ + adcs x14, x14, x11; \ + mul x11, x4, x7; \ + adcs x15, x15, x11; \ + mul x11, x4, x8; \ + adcs x16, x16, x11; \ + mul x11, x4, x9; \ + adcs x17, x17, x11; \ + mul x11, x4, x10; \ + adcs x19, x19, x11; \ + cset x20, cs; \ + umulh x11, x4, x5; \ + adds x14, x14, x11; \ + umulh x11, x4, x6; \ + adcs x15, x15, x11; \ + umulh x11, x4, x7; \ + adcs x16, x16, x11; \ + umulh x11, x4, x8; \ + adcs x17, x17, x11; \ + umulh x11, x4, x9; \ + adcs x19, x19, x11; \ + umulh x11, x4, x10; \ + adc x20, x20, x11; \ + ldp x3, x4, [P1+16]; \ + mul x11, x3, x5; \ + adds x14, x14, x11; \ + mul x11, x3, x6; \ + adcs x15, x15, x11; \ + mul x11, x3, x7; \ + adcs x16, x16, x11; \ + mul x11, x3, x8; \ + adcs x17, x17, x11; \ + mul x11, x3, x9; \ + adcs x19, x19, x11; \ + mul x11, x3, x10; \ + adcs x20, x20, x11; \ + cset x21, cs; \ + umulh x11, x3, x5; \ + adds x15, x15, x11; \ + umulh x11, x3, x6; \ + adcs x16, x16, x11; \ + umulh x11, x3, x7; \ + adcs x17, x17, x11; \ + umulh x11, x3, x8; \ + adcs x19, x19, x11; \ + umulh x11, x3, x9; \ + adcs x20, x20, x11; \ + umulh x11, x3, x10; \ + adc x21, x21, x11; \ + mul x11, x4, x5; \ + adds x15, x15, x11; \ + mul x11, x4, x6; \ + adcs x16, x16, x11; \ + mul x11, x4, x7; \ + adcs x17, x17, x11; \ + mul x11, x4, x8; \ + adcs x19, x19, x11; \ + mul x11, x4, x9; \ + adcs x20, x20, x11; \ + mul x11, x4, x10; \ + adcs x21, x21, x11; \ + cset x22, cs; \ + umulh x11, x4, x5; \ + adds x16, x16, x11; \ + umulh x11, x4, x6; \ + adcs x17, x17, x11; \ + umulh x11, x4, x7; \ + adcs x19, x19, x11; \ + umulh x11, x4, x8; \ + adcs x20, x20, x11; \ + umulh x11, x4, x9; \ + adcs x21, x21, x11; \ + umulh x11, x4, x10; \ + adc x22, x22, x11; \ + ldp x3, x4, [P1+32]; \ + mul x11, x3, x5; \ + adds x16, x16, x11; \ + mul x11, x3, x6; \ + adcs x17, x17, x11; \ + mul x11, x3, x7; \ + adcs x19, x19, x11; \ + mul x11, x3, x8; \ + adcs x20, x20, x11; \ + mul x11, x3, x9; \ + adcs x21, x21, x11; \ + mul x11, x3, x10; \ + adcs x22, x22, x11; \ + cset x2, cs; \ + umulh x11, x3, x5; \ + adds x17, x17, x11; \ + umulh x11, x3, x6; \ + adcs x19, x19, x11; \ + umulh x11, x3, x7; \ + adcs x20, x20, x11; \ + umulh x11, x3, x8; \ + adcs x21, x21, x11; \ + umulh x11, x3, x9; \ + adcs x22, x22, x11; \ + umulh x11, x3, x10; \ + adc x2, x2, x11; \ + mul x11, x4, x5; \ + adds x17, x17, x11; \ + mul x11, x4, x6; \ + adcs x19, x19, x11; \ + mul x11, x4, x7; \ + adcs x20, x20, x11; \ + mul x11, x4, x8; \ + adcs x21, x21, x11; \ + mul x11, x4, x9; \ + adcs x22, x22, x11; \ + mul x11, x4, x10; \ + adcs x2, x2, x11; \ + cset x1, cs; \ + umulh x11, x4, x5; \ + adds x19, x19, x11; \ + umulh x11, x4, x6; \ + adcs x20, x20, x11; \ + umulh x11, x4, x7; \ + adcs x21, x21, x11; \ + umulh x11, x4, x8; \ + adcs x22, x22, x11; \ + umulh x11, x4, x9; \ + adcs x2, x2, x11; \ + umulh x11, x4, x10; \ + adc x1, x1, x11; \ + lsl x7, x12, #32; \ + add x12, x7, x12; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x12; \ + mov x6, #0xffffffff; \ + mul x5, x6, x12; \ + umulh x6, x6, x12; \ + adds x7, x7, x5; \ + adcs x6, x6, x12; \ + adc x5, xzr, xzr; \ + subs x13, x13, x7; \ + sbcs x14, x14, x6; \ + sbcs x15, x15, x5; \ + sbcs x16, x16, xzr; \ + sbcs x17, x17, xzr; \ + sbc x12, x12, xzr; \ + lsl x7, x13, #32; \ + add x13, x7, x13; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x13; \ + mov x6, #0xffffffff; \ + mul x5, x6, x13; \ + umulh x6, x6, x13; \ + adds x7, x7, x5; \ + adcs x6, x6, x13; \ + adc x5, xzr, xzr; \ + subs x14, x14, x7; \ + sbcs x15, x15, x6; \ + sbcs x16, x16, x5; \ + sbcs x17, x17, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + lsl x7, x14, #32; \ + add x14, x7, x14; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x14; \ + mov x6, #0xffffffff; \ + mul x5, x6, x14; \ + umulh x6, x6, x14; \ + adds x7, x7, x5; \ + adcs x6, x6, x14; \ + adc x5, xzr, xzr; \ + subs x15, x15, x7; \ + sbcs x16, x16, x6; \ + sbcs x17, x17, x5; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x14, x14, xzr; \ + lsl x7, x15, #32; \ + add x15, x7, x15; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x15; \ + mov x6, #0xffffffff; \ + mul x5, x6, x15; \ + umulh x6, x6, x15; \ + adds x7, x7, x5; \ + adcs x6, x6, x15; \ + adc x5, xzr, xzr; \ + subs x16, x16, x7; \ + sbcs x17, x17, x6; \ + sbcs x12, x12, x5; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + lsl x7, x16, #32; \ + add x16, x7, x16; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x16; \ + mov x6, #0xffffffff; \ + mul x5, x6, x16; \ + umulh x6, x6, x16; \ + adds x7, x7, x5; \ + adcs x6, x6, x16; \ + adc x5, xzr, xzr; \ + subs x17, x17, x7; \ + sbcs x12, x12, x6; \ + sbcs x13, x13, x5; \ + sbcs x14, x14, xzr; \ + sbcs x15, x15, xzr; \ + sbc x16, x16, xzr; \ + lsl x7, x17, #32; \ + add x17, x7, x17; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x17; \ + mov x6, #0xffffffff; \ + mul x5, x6, x17; \ + umulh x6, x6, x17; \ + adds x7, x7, x5; \ + adcs x6, x6, x17; \ + adc x5, xzr, xzr; \ + subs x12, x12, x7; \ + sbcs x13, x13, x6; \ + sbcs x14, x14, x5; \ + sbcs x15, x15, xzr; \ + sbcs x16, x16, xzr; \ + sbc x17, x17, xzr; \ + adds x12, x12, x19; \ + adcs x13, x13, x20; \ + adcs x14, x14, x21; \ + adcs x15, x15, x22; \ + adcs x16, x16, x2; \ + adcs x17, x17, x1; \ + adc x10, xzr, xzr; \ + mov x11, #0xffffffff00000001; \ + adds x19, x12, x11; \ + mov x11, #0xffffffff; \ + adcs x20, x13, x11; \ + mov x11, #0x1; \ + adcs x21, x14, x11; \ + adcs x22, x15, xzr; \ + adcs x2, x16, xzr; \ + adcs x1, x17, xzr; \ + adcs x10, x10, xzr; \ + csel x12, x12, x19, eq; \ + csel x13, x13, x20, eq; \ + csel x14, x14, x21, eq; \ + csel x15, x15, x22, eq; \ + csel x16, x16, x2, eq; \ + csel x17, x17, x1, eq; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16]; \ + stp x16, x17, [P0+32] + +// Corresponds exactly to bignum_montsqr_p384_alt + +#define montsqr_p384(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x8, x2, x4; \ + adds x10, x10, x8; \ + mul x11, x2, x5; \ + mul x8, x3, x4; \ + adcs x11, x11, x8; \ + umulh x12, x2, x5; \ + mul x8, x3, x5; \ + adcs x12, x12, x8; \ + ldp x6, x7, [P1+32]; \ + mul x13, x2, x7; \ + mul x8, x3, x6; \ + adcs x13, x13, x8; \ + umulh x14, x2, x7; \ + mul x8, x3, x7; \ + adcs x14, x14, x8; \ + mul x15, x5, x6; \ + adcs x15, x15, xzr; \ + umulh x16, x5, x6; \ + adc x16, x16, xzr; \ + umulh x8, x2, x4; \ + adds x11, x11, x8; \ + umulh x8, x3, x4; \ + adcs x12, x12, x8; \ + umulh x8, x3, x5; \ + adcs x13, x13, x8; \ + umulh x8, x3, x6; \ + adcs x14, x14, x8; \ + umulh x8, x3, x7; \ + adcs x15, x15, x8; \ + adc x16, x16, xzr; \ + mul x8, x2, x6; \ + adds x12, x12, x8; \ + mul x8, x4, x5; \ + adcs x13, x13, x8; \ + mul x8, x4, x6; \ + adcs x14, x14, x8; \ + mul x8, x4, x7; \ + adcs x15, x15, x8; \ + mul x8, x5, x7; \ + adcs x16, x16, x8; \ + mul x17, x6, x7; \ + adcs x17, x17, xzr; \ + umulh x19, x6, x7; \ + adc x19, x19, xzr; \ + umulh x8, x2, x6; \ + adds x13, x13, x8; \ + umulh x8, x4, x5; \ + adcs x14, x14, x8; \ + umulh x8, x4, x6; \ + adcs x15, x15, x8; \ + umulh x8, x4, x7; \ + adcs x16, x16, x8; \ + umulh x8, x5, x7; \ + adcs x17, x17, x8; \ + adc x19, x19, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + cset x20, hs; \ + umulh x8, x2, x2; \ + mul x2, x2, x2; \ + adds x9, x9, x8; \ + mul x8, x3, x3; \ + adcs x10, x10, x8; \ + umulh x8, x3, x3; \ + adcs x11, x11, x8; \ + mul x8, x4, x4; \ + adcs x12, x12, x8; \ + umulh x8, x4, x4; \ + adcs x13, x13, x8; \ + mul x8, x5, x5; \ + adcs x14, x14, x8; \ + umulh x8, x5, x5; \ + adcs x15, x15, x8; \ + mul x8, x6, x6; \ + adcs x16, x16, x8; \ + umulh x8, x6, x6; \ + adcs x17, x17, x8; \ + mul x8, x7, x7; \ + adcs x19, x19, x8; \ + umulh x8, x7, x7; \ + adc x20, x20, x8; \ + lsl x5, x2, #32; \ + add x2, x5, x2; \ + mov x5, #-4294967295; \ + umulh x5, x5, x2; \ + mov x4, #4294967295; \ + mul x3, x4, x2; \ + umulh x4, x4, x2; \ + adds x5, x5, x3; \ + adcs x4, x4, x2; \ + adc x3, xzr, xzr; \ + subs x9, x9, x5; \ + sbcs x10, x10, x4; \ + sbcs x11, x11, x3; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x2, x2, xzr; \ + lsl x5, x9, #32; \ + add x9, x5, x9; \ + mov x5, #-4294967295; \ + umulh x5, x5, x9; \ + mov x4, #4294967295; \ + mul x3, x4, x9; \ + umulh x4, x4, x9; \ + adds x5, x5, x3; \ + adcs x4, x4, x9; \ + adc x3, xzr, xzr; \ + subs x10, x10, x5; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + sbcs x13, x13, xzr; \ + sbcs x2, x2, xzr; \ + sbc x9, x9, xzr; \ + lsl x5, x10, #32; \ + add x10, x5, x10; \ + mov x5, #-4294967295; \ + umulh x5, x5, x10; \ + mov x4, #4294967295; \ + mul x3, x4, x10; \ + umulh x4, x4, x10; \ + adds x5, x5, x3; \ + adcs x4, x4, x10; \ + adc x3, xzr, xzr; \ + subs x11, x11, x5; \ + sbcs x12, x12, x4; \ + sbcs x13, x13, x3; \ + sbcs x2, x2, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + lsl x5, x11, #32; \ + add x11, x5, x11; \ + mov x5, #-4294967295; \ + umulh x5, x5, x11; \ + mov x4, #4294967295; \ + mul x3, x4, x11; \ + umulh x4, x4, x11; \ + adds x5, x5, x3; \ + adcs x4, x4, x11; \ + adc x3, xzr, xzr; \ + subs x12, x12, x5; \ + sbcs x13, x13, x4; \ + sbcs x2, x2, x3; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbc x11, x11, xzr; \ + lsl x5, x12, #32; \ + add x12, x5, x12; \ + mov x5, #-4294967295; \ + umulh x5, x5, x12; \ + mov x4, #4294967295; \ + mul x3, x4, x12; \ + umulh x4, x4, x12; \ + adds x5, x5, x3; \ + adcs x4, x4, x12; \ + adc x3, xzr, xzr; \ + subs x13, x13, x5; \ + sbcs x2, x2, x4; \ + sbcs x9, x9, x3; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbc x12, x12, xzr; \ + lsl x5, x13, #32; \ + add x13, x5, x13; \ + mov x5, #-4294967295; \ + umulh x5, x5, x13; \ + mov x4, #4294967295; \ + mul x3, x4, x13; \ + umulh x4, x4, x13; \ + adds x5, x5, x3; \ + adcs x4, x4, x13; \ + adc x3, xzr, xzr; \ + subs x2, x2, x5; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, x17; \ + adcs x12, x12, x19; \ + adcs x13, x13, x20; \ + adc x6, xzr, xzr; \ + mov x8, #-4294967295; \ + adds x14, x2, x8; \ + mov x8, #4294967295; \ + adcs x15, x9, x8; \ + mov x8, #1; \ + adcs x16, x10, x8; \ + adcs x17, x11, xzr; \ + adcs x19, x12, xzr; \ + adcs x20, x13, xzr; \ + adcs x6, x6, xzr; \ + csel x2, x2, x14, eq; \ + csel x9, x9, x15, eq; \ + csel x10, x10, x16, eq; \ + csel x11, x11, x17, eq; \ + csel x12, x12, x19, eq; \ + csel x13, x13, x20, eq; \ + stp x2, x9, [P0]; \ + stp x10, x11, [P0+16]; \ + stp x12, x13, [P0+32] + +// Corresponds exactly to bignum_sub_p384 + +#define sub_p384(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + csetm x3, lo; \ + mov x4, #4294967295; \ + and x4, x4, x3; \ + adds x5, x5, x4; \ + eor x4, x4, x3; \ + adcs x6, x6, x4; \ + mov x4, #-2; \ + and x4, x4, x3; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + adcs x9, x9, x3; \ + adc x10, x10, x3; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32] + +// Corresponds exactly to bignum_add_p384 + +#define add_p384(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + adds x5, x5, x4; \ + adcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + adcs x9, x9, x4; \ + adcs x10, x10, x3; \ + adc x3, xzr, xzr; \ + mov x4, #0xffffffff; \ + cmp x5, x4; \ + mov x4, #0xffffffff00000000; \ + sbcs xzr, x6, x4; \ + mov x4, #0xfffffffffffffffe; \ + sbcs xzr, x7, x4; \ + adcs xzr, x8, xzr; \ + adcs xzr, x9, xzr; \ + adcs xzr, x10, xzr; \ + adcs x3, x3, xzr; \ + csetm x3, ne; \ + mov x4, #0xffffffff; \ + and x4, x4, x3; \ + subs x5, x5, x4; \ + eor x4, x4, x3; \ + sbcs x6, x6, x4; \ + mov x4, #0xfffffffffffffffe; \ + and x4, x4, x3; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + sbcs x9, x9, x3; \ + sbc x10, x10, x3; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32] + +// P0 = 4 * P1 - P2 + +#define cmsub41_p384(P0,P1,P2) \ + ldp x1, x2, [P1]; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P1+32]; \ + lsl x0, x1, #2; \ + ldp x7, x8, [P2]; \ + subs x0, x0, x7; \ + extr x1, x2, x1, #62; \ + sbcs x1, x1, x8; \ + ldp x7, x8, [P2+16]; \ + extr x2, x3, x2, #62; \ + sbcs x2, x2, x7; \ + extr x3, x4, x3, #62; \ + sbcs x3, x3, x8; \ + extr x4, x5, x4, #62; \ + ldp x7, x8, [P2+32]; \ + sbcs x4, x4, x7; \ + extr x5, x6, x5, #62; \ + sbcs x5, x5, x8; \ + lsr x6, x6, #62; \ + adc x6, x6, xzr; \ + lsl x7, x6, #32; \ + subs x8, x6, x7; \ + sbc x7, x7, xzr; \ + adds x0, x0, x8; \ + adcs x1, x1, x7; \ + adcs x2, x2, x6; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csetm x8, cc; \ + mov x9, #0xffffffff; \ + and x9, x9, x8; \ + adds x0, x0, x9; \ + eor x9, x9, x8; \ + adcs x1, x1, x9; \ + mov x9, #0xfffffffffffffffe; \ + and x9, x9, x8; \ + adcs x2, x2, x9; \ + adcs x3, x3, x8; \ + adcs x4, x4, x8; \ + adc x5, x5, x8; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16]; \ + stp x4, x5, [P0+32] + +// P0 = C * P1 - D * P2 + +#define cmsub_p384(P0,C,P1,D,P2) \ + ldp x0, x1, [P2]; \ + mov x6, #0x00000000ffffffff; \ + subs x6, x6, x0; \ + mov x7, #0xffffffff00000000; \ + sbcs x7, x7, x1; \ + ldp x0, x1, [P2+16]; \ + mov x8, #0xfffffffffffffffe; \ + sbcs x8, x8, x0; \ + mov x13, #0xffffffffffffffff; \ + sbcs x9, x13, x1; \ + ldp x0, x1, [P2+32]; \ + sbcs x10, x13, x0; \ + sbc x11, x13, x1; \ + mov x12, D; \ + mul x0, x12, x6; \ + mul x1, x12, x7; \ + mul x2, x12, x8; \ + mul x3, x12, x9; \ + mul x4, x12, x10; \ + mul x5, x12, x11; \ + umulh x6, x12, x6; \ + umulh x7, x12, x7; \ + umulh x8, x12, x8; \ + umulh x9, x12, x9; \ + umulh x10, x12, x10; \ + umulh x12, x12, x11; \ + adds x1, x1, x6; \ + adcs x2, x2, x7; \ + adcs x3, x3, x8; \ + adcs x4, x4, x9; \ + adcs x5, x5, x10; \ + mov x6, #1; \ + adc x6, x12, x6; \ + ldp x8, x9, [P1]; \ + ldp x10, x11, [P1+16]; \ + ldp x12, x13, [P1+32]; \ + mov x14, C; \ + mul x15, x14, x8; \ + umulh x8, x14, x8; \ + adds x0, x0, x15; \ + mul x15, x14, x9; \ + umulh x9, x14, x9; \ + adcs x1, x1, x15; \ + mul x15, x14, x10; \ + umulh x10, x14, x10; \ + adcs x2, x2, x15; \ + mul x15, x14, x11; \ + umulh x11, x14, x11; \ + adcs x3, x3, x15; \ + mul x15, x14, x12; \ + umulh x12, x14, x12; \ + adcs x4, x4, x15; \ + mul x15, x14, x13; \ + umulh x13, x14, x13; \ + adcs x5, x5, x15; \ + adc x6, x6, xzr; \ + adds x1, x1, x8; \ + adcs x2, x2, x9; \ + adcs x3, x3, x10; \ + adcs x4, x4, x11; \ + adcs x5, x5, x12; \ + adcs x6, x6, x13; \ + lsl x7, x6, #32; \ + subs x8, x6, x7; \ + sbc x7, x7, xzr; \ + adds x0, x0, x8; \ + adcs x1, x1, x7; \ + adcs x2, x2, x6; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csetm x6, cc; \ + mov x7, #0xffffffff; \ + and x7, x7, x6; \ + adds x0, x0, x7; \ + eor x7, x7, x6; \ + adcs x1, x1, x7; \ + mov x7, #0xfffffffffffffffe; \ + and x7, x7, x6; \ + adcs x2, x2, x7; \ + adcs x3, x3, x6; \ + adcs x4, x4, x6; \ + adc x5, x5, x6; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16]; \ + stp x4, x5, [P0+32] + +// A weak version of add that only guarantees sum in 6 digits + +#define weakadd_p384(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + adds x5, x5, x4; \ + adcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + adcs x9, x9, x4; \ + adcs x10, x10, x3; \ + csetm x3, cs; \ + mov x4, #0xffffffff; \ + and x4, x4, x3; \ + subs x5, x5, x4; \ + eor x4, x4, x3; \ + sbcs x6, x6, x4; \ + mov x4, #0xfffffffffffffffe; \ + and x4, x4, x3; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + sbcs x9, x9, x3; \ + sbc x10, x10, x3; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32] + +// P0 = 3 * P1 - 8 * P2 + +#define cmsub38_p384(P0,P1,P2) \ + ldp x0, x1, [P2]; \ + mov x6, #0x00000000ffffffff; \ + subs x6, x6, x0; \ + mov x7, #0xffffffff00000000; \ + sbcs x7, x7, x1; \ + ldp x0, x1, [P2+16]; \ + mov x8, #0xfffffffffffffffe; \ + sbcs x8, x8, x0; \ + mov x13, #0xffffffffffffffff; \ + sbcs x9, x13, x1; \ + ldp x0, x1, [P2+32]; \ + sbcs x10, x13, x0; \ + sbc x11, x13, x1; \ + lsl x0, x6, #3; \ + extr x1, x7, x6, #61; \ + extr x2, x8, x7, #61; \ + extr x3, x9, x8, #61; \ + extr x4, x10, x9, #61; \ + extr x5, x11, x10, #61; \ + lsr x6, x11, #61; \ + add x6, x6, #1; \ + ldp x8, x9, [P1]; \ + ldp x10, x11, [P1+16]; \ + ldp x12, x13, [P1+32]; \ + mov x14, 3; \ + mul x15, x14, x8; \ + umulh x8, x14, x8; \ + adds x0, x0, x15; \ + mul x15, x14, x9; \ + umulh x9, x14, x9; \ + adcs x1, x1, x15; \ + mul x15, x14, x10; \ + umulh x10, x14, x10; \ + adcs x2, x2, x15; \ + mul x15, x14, x11; \ + umulh x11, x14, x11; \ + adcs x3, x3, x15; \ + mul x15, x14, x12; \ + umulh x12, x14, x12; \ + adcs x4, x4, x15; \ + mul x15, x14, x13; \ + umulh x13, x14, x13; \ + adcs x5, x5, x15; \ + adc x6, x6, xzr; \ + adds x1, x1, x8; \ + adcs x2, x2, x9; \ + adcs x3, x3, x10; \ + adcs x4, x4, x11; \ + adcs x5, x5, x12; \ + adcs x6, x6, x13; \ + lsl x7, x6, #32; \ + subs x8, x6, x7; \ + sbc x7, x7, xzr; \ + adds x0, x0, x8; \ + adcs x1, x1, x7; \ + adcs x2, x2, x6; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csetm x6, cc; \ + mov x7, #0xffffffff; \ + and x7, x7, x6; \ + adds x0, x0, x7; \ + eor x7, x7, x6; \ + adcs x1, x1, x7; \ + mov x7, #0xfffffffffffffffe; \ + and x7, x7, x6; \ + adcs x2, x2, x7; \ + adcs x3, x3, x6; \ + adcs x4, x4, x6; \ + adc x5, x5, x6; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16]; \ + stp x4, x5, [P0+32] + +S2N_BN_SYMBOL(p384_montjdouble): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + +// Main code, just a sequence of basic field operations + +// z2 = z^2 +// y2 = y^2 + + montsqr_p384(z2,z_1) + montsqr_p384(y2,y_1) + +// x2p = x^2 - z^4 = (x + z^2) * (x - z^2) + + weakadd_p384(t1,x_1,z2) + sub_p384(t2,x_1,z2) + montmul_p384(x2p,t1,t2) + +// t1 = y + z +// x4p = x2p^2 +// xy2 = x * y^2 + + add_p384(t1,y_1,z_1) + montsqr_p384(x4p,x2p) + montmul_p384(xy2,x_1,y2) + +// t2 = (y + z)^2 + + montsqr_p384(t2,t1) + +// d = 12 * xy2 - 9 * x4p +// t1 = y^2 + 2 * y * z + + cmsub_p384(d,12,xy2,9,x4p) + sub_p384(t1,t2,z2) + +// y4 = y^4 + + montsqr_p384(y4,y2) + +// z_3' = 2 * y * z +// dx2 = d * x2p + + sub_p384(z_3,t1,y2) + montmul_p384(dx2,d,x2p) + +// x' = 4 * xy2 - d + + cmsub41_p384(x_3,xy2,d) + +// y' = 3 * dx2 - 8 * y4 + + cmsub38_p384(y_3,dx2,y4) + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/p384/p384_montjmixadd.S b/arm/p384/p384_montjmixadd.S new file mode 100644 index 0000000000..f7467be289 --- /dev/null +++ b/arm/p384/p384_montjmixadd.S @@ -0,0 +1,884 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjmixadd +// (uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 12]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// The "mixed" part means that p2 only has x and y coordinates, with the +// implicit z coordinate assumed to be the identity. +// +// Standard ARM ABI: X0 = p3, X1 = p1, X2 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjmixadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjmixadd) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Stable homes for input arguments during main code sequence + +#define input_z x24 +#define input_x x25 +#define input_y x26 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_2 input_y, #0 +#define y_2 input_y, #NUMSIZE + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define zp2 sp, #(NUMSIZE*0) +#define ww sp, #(NUMSIZE*0) + +#define yd sp, #(NUMSIZE*1) +#define y2a sp, #(NUMSIZE*1) + +#define x2a sp, #(NUMSIZE*2) +#define zzx2 sp, #(NUMSIZE*2) + +#define zz sp, #(NUMSIZE*3) +#define t1 sp, #(NUMSIZE*3) + +#define t2 sp, #(NUMSIZE*4) +#define zzx1 sp, #(NUMSIZE*4) + +#define xd sp, #(NUMSIZE*5) + +#define NSPACE (NUMSIZE*6) + +// Corresponds exactly to bignum_montmul_p384_alt + +#define montmul_p384(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x12, x3, x5; \ + umulh x13, x3, x5; \ + mul x11, x3, x6; \ + umulh x14, x3, x6; \ + adds x13, x13, x11; \ + ldp x7, x8, [P2+16]; \ + mul x11, x3, x7; \ + umulh x15, x3, x7; \ + adcs x14, x14, x11; \ + mul x11, x3, x8; \ + umulh x16, x3, x8; \ + adcs x15, x15, x11; \ + ldp x9, x10, [P2+32]; \ + mul x11, x3, x9; \ + umulh x17, x3, x9; \ + adcs x16, x16, x11; \ + mul x11, x3, x10; \ + umulh x19, x3, x10; \ + adcs x17, x17, x11; \ + adc x19, x19, xzr; \ + mul x11, x4, x5; \ + adds x13, x13, x11; \ + mul x11, x4, x6; \ + adcs x14, x14, x11; \ + mul x11, x4, x7; \ + adcs x15, x15, x11; \ + mul x11, x4, x8; \ + adcs x16, x16, x11; \ + mul x11, x4, x9; \ + adcs x17, x17, x11; \ + mul x11, x4, x10; \ + adcs x19, x19, x11; \ + cset x20, cs; \ + umulh x11, x4, x5; \ + adds x14, x14, x11; \ + umulh x11, x4, x6; \ + adcs x15, x15, x11; \ + umulh x11, x4, x7; \ + adcs x16, x16, x11; \ + umulh x11, x4, x8; \ + adcs x17, x17, x11; \ + umulh x11, x4, x9; \ + adcs x19, x19, x11; \ + umulh x11, x4, x10; \ + adc x20, x20, x11; \ + ldp x3, x4, [P1+16]; \ + mul x11, x3, x5; \ + adds x14, x14, x11; \ + mul x11, x3, x6; \ + adcs x15, x15, x11; \ + mul x11, x3, x7; \ + adcs x16, x16, x11; \ + mul x11, x3, x8; \ + adcs x17, x17, x11; \ + mul x11, x3, x9; \ + adcs x19, x19, x11; \ + mul x11, x3, x10; \ + adcs x20, x20, x11; \ + cset x21, cs; \ + umulh x11, x3, x5; \ + adds x15, x15, x11; \ + umulh x11, x3, x6; \ + adcs x16, x16, x11; \ + umulh x11, x3, x7; \ + adcs x17, x17, x11; \ + umulh x11, x3, x8; \ + adcs x19, x19, x11; \ + umulh x11, x3, x9; \ + adcs x20, x20, x11; \ + umulh x11, x3, x10; \ + adc x21, x21, x11; \ + mul x11, x4, x5; \ + adds x15, x15, x11; \ + mul x11, x4, x6; \ + adcs x16, x16, x11; \ + mul x11, x4, x7; \ + adcs x17, x17, x11; \ + mul x11, x4, x8; \ + adcs x19, x19, x11; \ + mul x11, x4, x9; \ + adcs x20, x20, x11; \ + mul x11, x4, x10; \ + adcs x21, x21, x11; \ + cset x22, cs; \ + umulh x11, x4, x5; \ + adds x16, x16, x11; \ + umulh x11, x4, x6; \ + adcs x17, x17, x11; \ + umulh x11, x4, x7; \ + adcs x19, x19, x11; \ + umulh x11, x4, x8; \ + adcs x20, x20, x11; \ + umulh x11, x4, x9; \ + adcs x21, x21, x11; \ + umulh x11, x4, x10; \ + adc x22, x22, x11; \ + ldp x3, x4, [P1+32]; \ + mul x11, x3, x5; \ + adds x16, x16, x11; \ + mul x11, x3, x6; \ + adcs x17, x17, x11; \ + mul x11, x3, x7; \ + adcs x19, x19, x11; \ + mul x11, x3, x8; \ + adcs x20, x20, x11; \ + mul x11, x3, x9; \ + adcs x21, x21, x11; \ + mul x11, x3, x10; \ + adcs x22, x22, x11; \ + cset x2, cs; \ + umulh x11, x3, x5; \ + adds x17, x17, x11; \ + umulh x11, x3, x6; \ + adcs x19, x19, x11; \ + umulh x11, x3, x7; \ + adcs x20, x20, x11; \ + umulh x11, x3, x8; \ + adcs x21, x21, x11; \ + umulh x11, x3, x9; \ + adcs x22, x22, x11; \ + umulh x11, x3, x10; \ + adc x2, x2, x11; \ + mul x11, x4, x5; \ + adds x17, x17, x11; \ + mul x11, x4, x6; \ + adcs x19, x19, x11; \ + mul x11, x4, x7; \ + adcs x20, x20, x11; \ + mul x11, x4, x8; \ + adcs x21, x21, x11; \ + mul x11, x4, x9; \ + adcs x22, x22, x11; \ + mul x11, x4, x10; \ + adcs x2, x2, x11; \ + cset x1, cs; \ + umulh x11, x4, x5; \ + adds x19, x19, x11; \ + umulh x11, x4, x6; \ + adcs x20, x20, x11; \ + umulh x11, x4, x7; \ + adcs x21, x21, x11; \ + umulh x11, x4, x8; \ + adcs x22, x22, x11; \ + umulh x11, x4, x9; \ + adcs x2, x2, x11; \ + umulh x11, x4, x10; \ + adc x1, x1, x11; \ + lsl x7, x12, #32; \ + add x12, x7, x12; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x12; \ + mov x6, #0xffffffff; \ + mul x5, x6, x12; \ + umulh x6, x6, x12; \ + adds x7, x7, x5; \ + adcs x6, x6, x12; \ + adc x5, xzr, xzr; \ + subs x13, x13, x7; \ + sbcs x14, x14, x6; \ + sbcs x15, x15, x5; \ + sbcs x16, x16, xzr; \ + sbcs x17, x17, xzr; \ + sbc x12, x12, xzr; \ + lsl x7, x13, #32; \ + add x13, x7, x13; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x13; \ + mov x6, #0xffffffff; \ + mul x5, x6, x13; \ + umulh x6, x6, x13; \ + adds x7, x7, x5; \ + adcs x6, x6, x13; \ + adc x5, xzr, xzr; \ + subs x14, x14, x7; \ + sbcs x15, x15, x6; \ + sbcs x16, x16, x5; \ + sbcs x17, x17, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + lsl x7, x14, #32; \ + add x14, x7, x14; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x14; \ + mov x6, #0xffffffff; \ + mul x5, x6, x14; \ + umulh x6, x6, x14; \ + adds x7, x7, x5; \ + adcs x6, x6, x14; \ + adc x5, xzr, xzr; \ + subs x15, x15, x7; \ + sbcs x16, x16, x6; \ + sbcs x17, x17, x5; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x14, x14, xzr; \ + lsl x7, x15, #32; \ + add x15, x7, x15; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x15; \ + mov x6, #0xffffffff; \ + mul x5, x6, x15; \ + umulh x6, x6, x15; \ + adds x7, x7, x5; \ + adcs x6, x6, x15; \ + adc x5, xzr, xzr; \ + subs x16, x16, x7; \ + sbcs x17, x17, x6; \ + sbcs x12, x12, x5; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + lsl x7, x16, #32; \ + add x16, x7, x16; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x16; \ + mov x6, #0xffffffff; \ + mul x5, x6, x16; \ + umulh x6, x6, x16; \ + adds x7, x7, x5; \ + adcs x6, x6, x16; \ + adc x5, xzr, xzr; \ + subs x17, x17, x7; \ + sbcs x12, x12, x6; \ + sbcs x13, x13, x5; \ + sbcs x14, x14, xzr; \ + sbcs x15, x15, xzr; \ + sbc x16, x16, xzr; \ + lsl x7, x17, #32; \ + add x17, x7, x17; \ + mov x7, #0xffffffff00000001; \ + umulh x7, x7, x17; \ + mov x6, #0xffffffff; \ + mul x5, x6, x17; \ + umulh x6, x6, x17; \ + adds x7, x7, x5; \ + adcs x6, x6, x17; \ + adc x5, xzr, xzr; \ + subs x12, x12, x7; \ + sbcs x13, x13, x6; \ + sbcs x14, x14, x5; \ + sbcs x15, x15, xzr; \ + sbcs x16, x16, xzr; \ + sbc x17, x17, xzr; \ + adds x12, x12, x19; \ + adcs x13, x13, x20; \ + adcs x14, x14, x21; \ + adcs x15, x15, x22; \ + adcs x16, x16, x2; \ + adcs x17, x17, x1; \ + adc x10, xzr, xzr; \ + mov x11, #0xffffffff00000001; \ + adds x19, x12, x11; \ + mov x11, #0xffffffff; \ + adcs x20, x13, x11; \ + mov x11, #0x1; \ + adcs x21, x14, x11; \ + adcs x22, x15, xzr; \ + adcs x2, x16, xzr; \ + adcs x1, x17, xzr; \ + adcs x10, x10, xzr; \ + csel x12, x12, x19, eq; \ + csel x13, x13, x20, eq; \ + csel x14, x14, x21, eq; \ + csel x15, x15, x22, eq; \ + csel x16, x16, x2, eq; \ + csel x17, x17, x1, eq; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16]; \ + stp x16, x17, [P0+32] + +// Corresponds exactly to bignum_montsqr_p384_alt + +#define montsqr_p384(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x8, x2, x4; \ + adds x10, x10, x8; \ + mul x11, x2, x5; \ + mul x8, x3, x4; \ + adcs x11, x11, x8; \ + umulh x12, x2, x5; \ + mul x8, x3, x5; \ + adcs x12, x12, x8; \ + ldp x6, x7, [P1+32]; \ + mul x13, x2, x7; \ + mul x8, x3, x6; \ + adcs x13, x13, x8; \ + umulh x14, x2, x7; \ + mul x8, x3, x7; \ + adcs x14, x14, x8; \ + mul x15, x5, x6; \ + adcs x15, x15, xzr; \ + umulh x16, x5, x6; \ + adc x16, x16, xzr; \ + umulh x8, x2, x4; \ + adds x11, x11, x8; \ + umulh x8, x3, x4; \ + adcs x12, x12, x8; \ + umulh x8, x3, x5; \ + adcs x13, x13, x8; \ + umulh x8, x3, x6; \ + adcs x14, x14, x8; \ + umulh x8, x3, x7; \ + adcs x15, x15, x8; \ + adc x16, x16, xzr; \ + mul x8, x2, x6; \ + adds x12, x12, x8; \ + mul x8, x4, x5; \ + adcs x13, x13, x8; \ + mul x8, x4, x6; \ + adcs x14, x14, x8; \ + mul x8, x4, x7; \ + adcs x15, x15, x8; \ + mul x8, x5, x7; \ + adcs x16, x16, x8; \ + mul x17, x6, x7; \ + adcs x17, x17, xzr; \ + umulh x19, x6, x7; \ + adc x19, x19, xzr; \ + umulh x8, x2, x6; \ + adds x13, x13, x8; \ + umulh x8, x4, x5; \ + adcs x14, x14, x8; \ + umulh x8, x4, x6; \ + adcs x15, x15, x8; \ + umulh x8, x4, x7; \ + adcs x16, x16, x8; \ + umulh x8, x5, x7; \ + adcs x17, x17, x8; \ + adc x19, x19, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + cset x20, hs; \ + umulh x8, x2, x2; \ + mul x2, x2, x2; \ + adds x9, x9, x8; \ + mul x8, x3, x3; \ + adcs x10, x10, x8; \ + umulh x8, x3, x3; \ + adcs x11, x11, x8; \ + mul x8, x4, x4; \ + adcs x12, x12, x8; \ + umulh x8, x4, x4; \ + adcs x13, x13, x8; \ + mul x8, x5, x5; \ + adcs x14, x14, x8; \ + umulh x8, x5, x5; \ + adcs x15, x15, x8; \ + mul x8, x6, x6; \ + adcs x16, x16, x8; \ + umulh x8, x6, x6; \ + adcs x17, x17, x8; \ + mul x8, x7, x7; \ + adcs x19, x19, x8; \ + umulh x8, x7, x7; \ + adc x20, x20, x8; \ + lsl x5, x2, #32; \ + add x2, x5, x2; \ + mov x5, #-4294967295; \ + umulh x5, x5, x2; \ + mov x4, #4294967295; \ + mul x3, x4, x2; \ + umulh x4, x4, x2; \ + adds x5, x5, x3; \ + adcs x4, x4, x2; \ + adc x3, xzr, xzr; \ + subs x9, x9, x5; \ + sbcs x10, x10, x4; \ + sbcs x11, x11, x3; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x2, x2, xzr; \ + lsl x5, x9, #32; \ + add x9, x5, x9; \ + mov x5, #-4294967295; \ + umulh x5, x5, x9; \ + mov x4, #4294967295; \ + mul x3, x4, x9; \ + umulh x4, x4, x9; \ + adds x5, x5, x3; \ + adcs x4, x4, x9; \ + adc x3, xzr, xzr; \ + subs x10, x10, x5; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + sbcs x13, x13, xzr; \ + sbcs x2, x2, xzr; \ + sbc x9, x9, xzr; \ + lsl x5, x10, #32; \ + add x10, x5, x10; \ + mov x5, #-4294967295; \ + umulh x5, x5, x10; \ + mov x4, #4294967295; \ + mul x3, x4, x10; \ + umulh x4, x4, x10; \ + adds x5, x5, x3; \ + adcs x4, x4, x10; \ + adc x3, xzr, xzr; \ + subs x11, x11, x5; \ + sbcs x12, x12, x4; \ + sbcs x13, x13, x3; \ + sbcs x2, x2, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + lsl x5, x11, #32; \ + add x11, x5, x11; \ + mov x5, #-4294967295; \ + umulh x5, x5, x11; \ + mov x4, #4294967295; \ + mul x3, x4, x11; \ + umulh x4, x4, x11; \ + adds x5, x5, x3; \ + adcs x4, x4, x11; \ + adc x3, xzr, xzr; \ + subs x12, x12, x5; \ + sbcs x13, x13, x4; \ + sbcs x2, x2, x3; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbc x11, x11, xzr; \ + lsl x5, x12, #32; \ + add x12, x5, x12; \ + mov x5, #-4294967295; \ + umulh x5, x5, x12; \ + mov x4, #4294967295; \ + mul x3, x4, x12; \ + umulh x4, x4, x12; \ + adds x5, x5, x3; \ + adcs x4, x4, x12; \ + adc x3, xzr, xzr; \ + subs x13, x13, x5; \ + sbcs x2, x2, x4; \ + sbcs x9, x9, x3; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbc x12, x12, xzr; \ + lsl x5, x13, #32; \ + add x13, x5, x13; \ + mov x5, #-4294967295; \ + umulh x5, x5, x13; \ + mov x4, #4294967295; \ + mul x3, x4, x13; \ + umulh x4, x4, x13; \ + adds x5, x5, x3; \ + adcs x4, x4, x13; \ + adc x3, xzr, xzr; \ + subs x2, x2, x5; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, x17; \ + adcs x12, x12, x19; \ + adcs x13, x13, x20; \ + adc x6, xzr, xzr; \ + mov x8, #-4294967295; \ + adds x14, x2, x8; \ + mov x8, #4294967295; \ + adcs x15, x9, x8; \ + mov x8, #1; \ + adcs x16, x10, x8; \ + adcs x17, x11, xzr; \ + adcs x19, x12, xzr; \ + adcs x20, x13, xzr; \ + adcs x6, x6, xzr; \ + csel x2, x2, x14, eq; \ + csel x9, x9, x15, eq; \ + csel x10, x10, x16, eq; \ + csel x11, x11, x17, eq; \ + csel x12, x12, x19, eq; \ + csel x13, x13, x20, eq; \ + stp x2, x9, [P0]; \ + stp x10, x11, [P0+16]; \ + stp x12, x13, [P0+32] + +// Almost-Montgomery variant which we use when an input to other muls +// with the other argument fully reduced (which is always safe). In +// fact, with the Karatsuba-based Montgomery mul here, we don't even +// *need* the restriction that the other argument is reduced. + +#define amontsqr_p384(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x8, x2, x4; \ + adds x10, x10, x8; \ + mul x11, x2, x5; \ + mul x8, x3, x4; \ + adcs x11, x11, x8; \ + umulh x12, x2, x5; \ + mul x8, x3, x5; \ + adcs x12, x12, x8; \ + ldp x6, x7, [P1+32]; \ + mul x13, x2, x7; \ + mul x8, x3, x6; \ + adcs x13, x13, x8; \ + umulh x14, x2, x7; \ + mul x8, x3, x7; \ + adcs x14, x14, x8; \ + mul x15, x5, x6; \ + adcs x15, x15, xzr; \ + umulh x16, x5, x6; \ + adc x16, x16, xzr; \ + umulh x8, x2, x4; \ + adds x11, x11, x8; \ + umulh x8, x3, x4; \ + adcs x12, x12, x8; \ + umulh x8, x3, x5; \ + adcs x13, x13, x8; \ + umulh x8, x3, x6; \ + adcs x14, x14, x8; \ + umulh x8, x3, x7; \ + adcs x15, x15, x8; \ + adc x16, x16, xzr; \ + mul x8, x2, x6; \ + adds x12, x12, x8; \ + mul x8, x4, x5; \ + adcs x13, x13, x8; \ + mul x8, x4, x6; \ + adcs x14, x14, x8; \ + mul x8, x4, x7; \ + adcs x15, x15, x8; \ + mul x8, x5, x7; \ + adcs x16, x16, x8; \ + mul x17, x6, x7; \ + adcs x17, x17, xzr; \ + umulh x19, x6, x7; \ + adc x19, x19, xzr; \ + umulh x8, x2, x6; \ + adds x13, x13, x8; \ + umulh x8, x4, x5; \ + adcs x14, x14, x8; \ + umulh x8, x4, x6; \ + adcs x15, x15, x8; \ + umulh x8, x4, x7; \ + adcs x16, x16, x8; \ + umulh x8, x5, x7; \ + adcs x17, x17, x8; \ + adc x19, x19, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + cset x20, hs; \ + umulh x8, x2, x2; \ + mul x2, x2, x2; \ + adds x9, x9, x8; \ + mul x8, x3, x3; \ + adcs x10, x10, x8; \ + umulh x8, x3, x3; \ + adcs x11, x11, x8; \ + mul x8, x4, x4; \ + adcs x12, x12, x8; \ + umulh x8, x4, x4; \ + adcs x13, x13, x8; \ + mul x8, x5, x5; \ + adcs x14, x14, x8; \ + umulh x8, x5, x5; \ + adcs x15, x15, x8; \ + mul x8, x6, x6; \ + adcs x16, x16, x8; \ + umulh x8, x6, x6; \ + adcs x17, x17, x8; \ + mul x8, x7, x7; \ + adcs x19, x19, x8; \ + umulh x8, x7, x7; \ + adc x20, x20, x8; \ + lsl x5, x2, #32; \ + add x2, x5, x2; \ + mov x5, #-4294967295; \ + umulh x5, x5, x2; \ + mov x4, #4294967295; \ + mul x3, x4, x2; \ + umulh x4, x4, x2; \ + adds x5, x5, x3; \ + adcs x4, x4, x2; \ + adc x3, xzr, xzr; \ + subs x9, x9, x5; \ + sbcs x10, x10, x4; \ + sbcs x11, x11, x3; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbc x2, x2, xzr; \ + lsl x5, x9, #32; \ + add x9, x5, x9; \ + mov x5, #-4294967295; \ + umulh x5, x5, x9; \ + mov x4, #4294967295; \ + mul x3, x4, x9; \ + umulh x4, x4, x9; \ + adds x5, x5, x3; \ + adcs x4, x4, x9; \ + adc x3, xzr, xzr; \ + subs x10, x10, x5; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + sbcs x13, x13, xzr; \ + sbcs x2, x2, xzr; \ + sbc x9, x9, xzr; \ + lsl x5, x10, #32; \ + add x10, x5, x10; \ + mov x5, #-4294967295; \ + umulh x5, x5, x10; \ + mov x4, #4294967295; \ + mul x3, x4, x10; \ + umulh x4, x4, x10; \ + adds x5, x5, x3; \ + adcs x4, x4, x10; \ + adc x3, xzr, xzr; \ + subs x11, x11, x5; \ + sbcs x12, x12, x4; \ + sbcs x13, x13, x3; \ + sbcs x2, x2, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + lsl x5, x11, #32; \ + add x11, x5, x11; \ + mov x5, #-4294967295; \ + umulh x5, x5, x11; \ + mov x4, #4294967295; \ + mul x3, x4, x11; \ + umulh x4, x4, x11; \ + adds x5, x5, x3; \ + adcs x4, x4, x11; \ + adc x3, xzr, xzr; \ + subs x12, x12, x5; \ + sbcs x13, x13, x4; \ + sbcs x2, x2, x3; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbc x11, x11, xzr; \ + lsl x5, x12, #32; \ + add x12, x5, x12; \ + mov x5, #-4294967295; \ + umulh x5, x5, x12; \ + mov x4, #4294967295; \ + mul x3, x4, x12; \ + umulh x4, x4, x12; \ + adds x5, x5, x3; \ + adcs x4, x4, x12; \ + adc x3, xzr, xzr; \ + subs x13, x13, x5; \ + sbcs x2, x2, x4; \ + sbcs x9, x9, x3; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbc x12, x12, xzr; \ + lsl x5, x13, #32; \ + add x13, x5, x13; \ + mov x5, #-4294967295; \ + umulh x5, x5, x13; \ + mov x4, #4294967295; \ + mul x3, x4, x13; \ + umulh x4, x4, x13; \ + adds x5, x5, x3; \ + adcs x4, x4, x13; \ + adc x3, xzr, xzr; \ + subs x2, x2, x5; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, x17; \ + adcs x12, x12, x19; \ + adcs x13, x13, x20; \ + mov x14, #-4294967295; \ + mov x15, #4294967295; \ + csel x14, x14, xzr, cs; \ + csel x15, x15, xzr, cs; \ + cset x16, cs; \ + adds x2, x2, x14; \ + adcs x9, x9, x15; \ + adcs x10, x10, x16; \ + adcs x11, x11, xzr; \ + adcs x12, x12, xzr; \ + adc x13, x13, xzr; \ + stp x2, x9, [P0]; \ + stp x10, x11, [P0+16]; \ + stp x12, x13, [P0+32] + +// Corresponds exactly to bignum_sub_p384 + +#define sub_p384(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + csetm x3, lo; \ + mov x4, #4294967295; \ + and x4, x4, x3; \ + adds x5, x5, x4; \ + eor x4, x4, x3; \ + adcs x6, x6, x4; \ + mov x4, #-2; \ + and x4, x4, x3; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + adcs x9, x9, x3; \ + adc x10, x10, x3; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32] + +S2N_BN_SYMBOL(p384_montjmixadd): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + mov input_y, x2 + +// Main code, just a sequence of basic field operations +// 8 * multiply + 3 * square + 7 * subtract + + amontsqr_p384(zp2,z_1) + montmul_p384(y2a,z_1,y_2) + + montmul_p384(x2a,zp2,x_2) + montmul_p384(y2a,zp2,y2a) + + sub_p384(xd,x2a,x_1) + sub_p384(yd,y2a,y_1) + + amontsqr_p384(zz,xd) + montsqr_p384(ww,yd) + + montmul_p384(zzx1,zz,x_1) + montmul_p384(zzx2,zz,x2a) + + sub_p384(x_3,ww,zzx1) + sub_p384(t1,zzx2,zzx1) + + montmul_p384(z_3,xd,z_1) + + sub_p384(x_3,x_3,zzx2) + + sub_p384(t2,zzx1,x_3) + + montmul_p384(t1,t1,y_1) + montmul_p384(t2,yd,t2) + + sub_p384(y_3,t2,t1) + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x25, x26, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/p384/p384_montjadd.S b/x86_att/p384/p384_montjadd.S new file mode 100644 index 0000000000..e550f38609 --- /dev/null +++ b/x86_att/p384/p384_montjadd.S @@ -0,0 +1,955 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjadd +// (uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// +// Standard x86-64 ABI: RDI = p3, RSI = p1, RDX = p2 +// Microsoft x64 ABI: RCX = p3, RDX = p1, R8 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjadd) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Pointer-offset pairs for inputs and outputs +// These assume %rdi = p3, %rsi = p1 and %rcx = p2, +// which needs to be set up explicitly before use + +#define x_1 0(%rsi) +#define y_1 NUMSIZE(%rsi) +#define z_1 (2*NUMSIZE)(%rsi) + +#define x_2 0(%rcx) +#define y_2 NUMSIZE(%rcx) +#define z_2 (2*NUMSIZE)(%rcx) + +#define x_3 0(%rdi) +#define y_3 NUMSIZE(%rdi) +#define z_3 (2*NUMSIZE)(%rdi) + +// In one place it's convenient to use another register +// since the squaring function overwrites %rcx + +#define z_2_alt (2*NUMSIZE)(%rsi) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z1sq (NUMSIZE*0)(%rsp) +#define ww (NUMSIZE*0)(%rsp) + +#define yd (NUMSIZE*1)(%rsp) +#define y2a (NUMSIZE*1)(%rsp) + +#define x2a (NUMSIZE*2)(%rsp) +#define zzx2 (NUMSIZE*2)(%rsp) + +#define zz (NUMSIZE*3)(%rsp) +#define t1 (NUMSIZE*3)(%rsp) + +#define t2 (NUMSIZE*4)(%rsp) +#define x1a (NUMSIZE*4)(%rsp) +#define zzx1 (NUMSIZE*4)(%rsp) + +#define xd (NUMSIZE*5)(%rsp) +#define z2sq (NUMSIZE*5)(%rsp) + +#define y1a (NUMSIZE*6)(%rsp) + +// Temporaries for the actual input pointers + +#define input_x (NUMSIZE*7)(%rsp) +#define input_y (NUMSIZE*7+8)(%rsp) +#define input_z (NUMSIZE*7+16)(%rsp) + +#define NSPACE (NUMSIZE*7+24) + +// Corresponds exactly to bignum_montmul_p384 + +#define montmul_p384(P0,P1,P2) \ + movq P2, %rdx ; \ + xorl %r15d, %r15d ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + addq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + adcq %r15, %r14 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r8, %rbx ; \ + adcq %r8, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbp, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x8+P2, %rdx ; \ + xorl %r8d, %r8d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + adoxq %r8, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r14 ; \ + adcq %rbx, %r15 ; \ + adcq %r8, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r9, %rbx ; \ + adcq %r9, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq %rbp, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r14 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r15 ; \ + adcq $0x0, %r8 ; \ + movq 0x10+P2, %rdx ; \ + xorl %r9d, %r9d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adoxq %r9, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r15 ; \ + adcq %rbx, %r8 ; \ + adcq %r9, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r10, %rbx ; \ + adcq %r10, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq %rbp, %r13 ; \ + sbbq $0x0, %r14 ; \ + sbbq $0x0, %r15 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r8 ; \ + adcq $0x0, %r9 ; \ + movq 0x18+P2, %rdx ; \ + xorl %r10d, %r10d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + adoxq %r10, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %r10, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r11, %rbx ; \ + adcq %r11, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq %rbp, %r14 ; \ + sbbq $0x0, %r15 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r9 ; \ + adcq $0x0, %r10 ; \ + movq 0x20+P2, %rdx ; \ + xorl %r11d, %r11d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + adoxq %r11, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %r11, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r12, %rbx ; \ + adcq %r12, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r13 ; \ + sbbq %rbx, %r14 ; \ + sbbq %rbp, %r15 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + movq 0x28+P2, %rdx ; \ + xorl %r12d, %r12d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + adoxq %r12, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r10 ; \ + adcq %rbx, %r11 ; \ + adcq %r12, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r13, %rbx ; \ + adcq %r13, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r14 ; \ + sbbq %rbx, %r15 ; \ + sbbq %rbp, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorl %edx, %edx ; \ + xorl %ebp, %ebp ; \ + xorl %r13d, %r13d ; \ + movq $0xffffffff00000001, %rax ; \ + addq %r14, %rax ; \ + movl $0xffffffff, %ebx ; \ + adcq %r15, %rbx ; \ + movl $0x1, %ecx ; \ + adcq %r8, %rcx ; \ + adcq %r9, %rdx ; \ + adcq %r10, %rbp ; \ + adcq %r11, %r13 ; \ + adcq $0x0, %r12 ; \ + cmovne %rax, %r14 ; \ + cmovne %rbx, %r15 ; \ + cmovne %rcx, %r8 ; \ + cmovne %rdx, %r9 ; \ + cmovne %rbp, %r10 ; \ + cmovne %r13, %r11 ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %r8, 0x10+P0 ; \ + movq %r9, 0x18+P0 ; \ + movq %r10, 0x20+P0 ; \ + movq %r11, 0x28+P0 + +// Corresponds exactly to bignum_montsqr_p384 + +#define montsqr_p384(P0,P1) \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + mulxq 0x28+P1, %r13, %r14 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %r15, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %rcx ; \ + adcq %rbp, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x20+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rdx ; \ + adcxq %rax, %r15 ; \ + adoxq %rdx, %rcx ; \ + movq 0x28+P1, %rdx ; \ + mulxq 0x20+P1, %rbx, %rbp ; \ + mulxq 0x18+P1, %rax, %rdx ; \ + adcxq %rax, %rcx ; \ + adoxq %rdx, %rbx ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rbx ; \ + adoxq %rax, %rbp ; \ + adcq %rax, %rbp ; \ + xorq %rax, %rax ; \ + movq P1, %rdx ; \ + mulxq P1, %r8, %rax ; \ + adcxq %r9, %r9 ; \ + adoxq %rax, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %r15, %r15 ; \ + adoxq %rdx, %r15 ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %rcx, %rcx ; \ + adoxq %rax, %rcx ; \ + adcxq %rbx, %rbx ; \ + adoxq %rdx, %rbx ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rax, %rsi ; \ + adcxq %rbp, %rbp ; \ + adoxq %rax, %rbp ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rsi ; \ + adoxq %rax, %rsi ; \ + movq %rbx, P0 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r8, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r8 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r8 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r9 ; \ + sbbq %r8, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + movq %rdx, %r8 ; \ + sbbq $0x0, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r9, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r9 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r9 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r10 ; \ + sbbq %r9, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r8 ; \ + movq %rdx, %r9 ; \ + sbbq $0x0, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r10, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r10 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r10 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r11 ; \ + sbbq %r10, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + movq %rdx, %r10 ; \ + sbbq $0x0, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r11, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r11 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r11 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r12 ; \ + sbbq %r11, %r13 ; \ + sbbq %rbx, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + movq %rdx, %r11 ; \ + sbbq $0x0, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r12, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r12 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r12 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r13 ; \ + sbbq %r12, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %r11 ; \ + movq %rdx, %r12 ; \ + sbbq $0x0, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r13, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r13 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r13 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r8 ; \ + sbbq %r13, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq $0x0, %r11 ; \ + sbbq $0x0, %r12 ; \ + movq %rdx, %r13 ; \ + sbbq $0x0, %r13 ; \ + movq P0, %rbx ; \ + addq %r8, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r11, %rbx ; \ + adcq %r12, %rbp ; \ + adcq %r13, %rsi ; \ + movl $0x0, %r8d ; \ + adcq %r8, %r8 ; \ + xorq %r11, %r11 ; \ + xorq %r12, %r12 ; \ + xorq %r13, %r13 ; \ + movq $0xffffffff00000001, %rax ; \ + addq %r14, %rax ; \ + movl $0xffffffff, %r9d ; \ + adcq %r15, %r9 ; \ + movl $0x1, %r10d ; \ + adcq %rcx, %r10 ; \ + adcq %rbx, %r11 ; \ + adcq %rbp, %r12 ; \ + adcq %rsi, %r13 ; \ + adcq $0x0, %r8 ; \ + cmovne %rax, %r14 ; \ + cmovne %r9, %r15 ; \ + cmovne %r10, %rcx ; \ + cmovne %r11, %rbx ; \ + cmovne %r12, %rbp ; \ + cmovne %r13, %rsi ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %rcx, 0x10+P0 ; \ + movq %rbx, 0x18+P0 ; \ + movq %rbp, 0x20+P0 ; \ + movq %rsi, 0x28+P0 + +// Almost-Montgomery variant which we use when an input to other muls +// with the other argument fully reduced (which is always safe). + +#define amontsqr_p384(P0,P1) \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + mulxq 0x28+P1, %r13, %r14 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %r15, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %rcx ; \ + adcq %rbp, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x20+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rdx ; \ + adcxq %rax, %r15 ; \ + adoxq %rdx, %rcx ; \ + movq 0x28+P1, %rdx ; \ + mulxq 0x20+P1, %rbx, %rbp ; \ + mulxq 0x18+P1, %rax, %rdx ; \ + adcxq %rax, %rcx ; \ + adoxq %rdx, %rbx ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rbx ; \ + adoxq %rax, %rbp ; \ + adcq %rax, %rbp ; \ + xorq %rax, %rax ; \ + movq P1, %rdx ; \ + mulxq P1, %r8, %rax ; \ + adcxq %r9, %r9 ; \ + adoxq %rax, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %r15, %r15 ; \ + adoxq %rdx, %r15 ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %rcx, %rcx ; \ + adoxq %rax, %rcx ; \ + adcxq %rbx, %rbx ; \ + adoxq %rdx, %rbx ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rax, %rsi ; \ + adcxq %rbp, %rbp ; \ + adoxq %rax, %rbp ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rsi ; \ + adoxq %rax, %rsi ; \ + movq %rbx, P0 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r8, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r8 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r8 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r9 ; \ + sbbq %r8, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + movq %rdx, %r8 ; \ + sbbq $0x0, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r9, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r9 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r9 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r10 ; \ + sbbq %r9, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r8 ; \ + movq %rdx, %r9 ; \ + sbbq $0x0, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r10, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r10 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r10 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r11 ; \ + sbbq %r10, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + movq %rdx, %r10 ; \ + sbbq $0x0, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r11, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r11 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r11 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r12 ; \ + sbbq %r11, %r13 ; \ + sbbq %rbx, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + movq %rdx, %r11 ; \ + sbbq $0x0, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r12, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r12 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r12 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r13 ; \ + sbbq %r12, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %r11 ; \ + movq %rdx, %r12 ; \ + sbbq $0x0, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r13, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r13 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r13 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r8 ; \ + sbbq %r13, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq $0x0, %r11 ; \ + sbbq $0x0, %r12 ; \ + movq %rdx, %r13 ; \ + sbbq $0x0, %r13 ; \ + movq P0, %rbx ; \ + addq %r8, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r11, %rbx ; \ + adcq %r12, %rbp ; \ + adcq %r13, %rsi ; \ + movl $0x0, %r8d ; \ + movq $0xffffffff00000001, %rax ; \ + movl $0xffffffff, %r9d ; \ + movl $0x1, %r10d ; \ + cmovnc %r8, %rax ; \ + cmovnc %r8, %r9 ; \ + cmovnc %r8, %r10 ; \ + addq %rax, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r8, %rbx ; \ + adcq %r8, %rbp ; \ + adcq %r8, %rsi ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %rcx, 0x10+P0 ; \ + movq %rbx, 0x18+P0 ; \ + movq %rbp, 0x20+P0 ; \ + movq %rsi, 0x28+P0 + +// Corresponds exactly to bignum_sub_p384 + +#define sub_p384(P0,P1,P2) \ + movq P1, %rax ; \ + subq P2, %rax ; \ + movq 0x8+P1, %rdx ; \ + sbbq 0x8+P2, %rdx ; \ + movq 0x10+P1, %r8 ; \ + sbbq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + sbbq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + sbbq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + sbbq 0x28+P2, %r11 ; \ + sbbq %rcx, %rcx ; \ + movl $0xffffffff, %esi ; \ + andq %rsi, %rcx ; \ + xorq %rsi, %rsi ; \ + subq %rcx, %rsi ; \ + subq %rsi, %rax ; \ + movq %rax, P0 ; \ + sbbq %rcx, %rdx ; \ + movq %rdx, 0x8+P0 ; \ + sbbq %rax, %rax ; \ + andq %rsi, %rcx ; \ + negq %rax; \ + sbbq %rcx, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 + +S2N_BN_SYMBOL(p384_montjadd): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save registers and make room on stack for temporary variables +// Put the input arguments in non-volatile places on the stack + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + + movq %rdi, input_z + movq %rsi, input_x + movq %rdx, input_y + +// Main code, just a sequence of basic field operations +// 8 * multiply + 3 * square + 7 * subtract + + amontsqr_p384(z1sq,z_1) + movq input_y, %rsi + amontsqr_p384(z2sq,z_2_alt) + + movq input_x, %rsi + movq input_y, %rcx + montmul_p384(y1a,z_2,y_1) + movq input_x, %rsi + movq input_y, %rcx + montmul_p384(y2a,z_1,y_2) + + movq input_y, %rcx + montmul_p384(x2a,z1sq,x_2) + movq input_x, %rsi + montmul_p384(x1a,z2sq,x_1) + montmul_p384(y2a,z1sq,y2a) + montmul_p384(y1a,z2sq,y1a) + + sub_p384(xd,x2a,x1a) + sub_p384(yd,y2a,y1a) + + amontsqr_p384(zz,xd) + montsqr_p384(ww,yd) + + montmul_p384(zzx1,zz,x1a) + montmul_p384(zzx2,zz,x2a) + + movq input_z, %rdi + sub_p384(x_3,ww,zzx1) + sub_p384(t1,zzx2,zzx1) + + movq input_x, %rsi + montmul_p384(xd,xd,z_1) + + movq input_z, %rdi + sub_p384(x_3,x_3,zzx2) + + movq input_z, %rdi + sub_p384(t2,zzx1,x_3) + + montmul_p384(t1,t1,y1a) + + movq input_z, %rdi + movq input_y, %rcx + montmul_p384(z_3,xd,z_2) + montmul_p384(t2,yd,t2) + + movq input_z, %rdi + sub_p384(y_3,t2,t1) + +// Restore stack and registers + + addq $NSPACE, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/p384/p384_montjdouble.S b/x86_att/p384/p384_montjdouble.S new file mode 100644 index 0000000000..d7de785797 --- /dev/null +++ b/x86_att/p384/p384_montjdouble.S @@ -0,0 +1,1014 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjdouble +// (uint64_t p3[static 18],uint64_t p1[static 18]); +// +// Does p3 := 2 * p1 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// +// Standard x86-64 ABI: RDI = p3, RSI = p1 +// Microsoft x64 ABI: RCX = p3, RDX = p1 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjdouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjdouble) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Pointer-offset pairs for inputs and outputs +// These assume %rdi = p3, %rsi = p1. The latter stays true +// but montsqr below modifies %rdi as well. Thus, we need +// to save %rdi and restore it before the writes to outputs. + +#define x_1 0(%rsi) +#define y_1 NUMSIZE(%rsi) +#define z_1 (2*NUMSIZE)(%rsi) + +#define x_3 0(%rdi) +#define y_3 NUMSIZE(%rdi) +#define z_3 (2*NUMSIZE)(%rdi) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z2 (NUMSIZE*0)(%rsp) +#define y2 (NUMSIZE*1)(%rsp) +#define x2p (NUMSIZE*2)(%rsp) +#define xy2 (NUMSIZE*3)(%rsp) + +#define y4 (NUMSIZE*4)(%rsp) +#define t2 (NUMSIZE*4)(%rsp) + +#define dx2 (NUMSIZE*5)(%rsp) +#define t1 (NUMSIZE*5)(%rsp) + +#define d (NUMSIZE*6)(%rsp) +#define x4p (NUMSIZE*6)(%rsp) + +// Safe place for pointer to the output + +#define input_z (NUMSIZE*7)(%rsp) + +#define NSPACE (NUMSIZE*7+8) + +// Corresponds exactly to bignum_montmul_p384 + +#define montmul_p384(P0,P1,P2) \ + movq P2, %rdx ; \ + xorl %r15d, %r15d ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + addq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + adcq %r15, %r14 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r8, %rbx ; \ + adcq %r8, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbp, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x8+P2, %rdx ; \ + xorl %r8d, %r8d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + adoxq %r8, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r14 ; \ + adcq %rbx, %r15 ; \ + adcq %r8, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r9, %rbx ; \ + adcq %r9, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq %rbp, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r14 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r15 ; \ + adcq $0x0, %r8 ; \ + movq 0x10+P2, %rdx ; \ + xorl %r9d, %r9d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adoxq %r9, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r15 ; \ + adcq %rbx, %r8 ; \ + adcq %r9, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r10, %rbx ; \ + adcq %r10, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq %rbp, %r13 ; \ + sbbq $0x0, %r14 ; \ + sbbq $0x0, %r15 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r8 ; \ + adcq $0x0, %r9 ; \ + movq 0x18+P2, %rdx ; \ + xorl %r10d, %r10d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + adoxq %r10, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %r10, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r11, %rbx ; \ + adcq %r11, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq %rbp, %r14 ; \ + sbbq $0x0, %r15 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r9 ; \ + adcq $0x0, %r10 ; \ + movq 0x20+P2, %rdx ; \ + xorl %r11d, %r11d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + adoxq %r11, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %r11, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r12, %rbx ; \ + adcq %r12, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r13 ; \ + sbbq %rbx, %r14 ; \ + sbbq %rbp, %r15 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + movq 0x28+P2, %rdx ; \ + xorl %r12d, %r12d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + adoxq %r12, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r10 ; \ + adcq %rbx, %r11 ; \ + adcq %r12, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r13, %rbx ; \ + adcq %r13, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r14 ; \ + sbbq %rbx, %r15 ; \ + sbbq %rbp, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorl %edx, %edx ; \ + xorl %ebp, %ebp ; \ + xorl %r13d, %r13d ; \ + movq $0xffffffff00000001, %rax ; \ + addq %r14, %rax ; \ + movl $0xffffffff, %ebx ; \ + adcq %r15, %rbx ; \ + movl $0x1, %ecx ; \ + adcq %r8, %rcx ; \ + adcq %r9, %rdx ; \ + adcq %r10, %rbp ; \ + adcq %r11, %r13 ; \ + adcq $0x0, %r12 ; \ + cmovne %rax, %r14 ; \ + cmovne %rbx, %r15 ; \ + cmovne %rcx, %r8 ; \ + cmovne %rdx, %r9 ; \ + cmovne %rbp, %r10 ; \ + cmovne %r13, %r11 ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %r8, 0x10+P0 ; \ + movq %r9, 0x18+P0 ; \ + movq %r10, 0x20+P0 ; \ + movq %r11, 0x28+P0 + +// Corresponds exactly to bignum_montsqr_p384 + +#define montsqr_p384(P0,P1) \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + mulxq 0x28+P1, %r13, %r14 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %r15, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %rcx ; \ + adcq %rbp, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x20+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rdx ; \ + adcxq %rax, %r15 ; \ + adoxq %rdx, %rcx ; \ + movq 0x28+P1, %rdx ; \ + mulxq 0x20+P1, %rbx, %rbp ; \ + mulxq 0x18+P1, %rax, %rdx ; \ + adcxq %rax, %rcx ; \ + adoxq %rdx, %rbx ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rbx ; \ + adoxq %rax, %rbp ; \ + adcq %rax, %rbp ; \ + xorq %rax, %rax ; \ + movq P1, %rdx ; \ + mulxq P1, %r8, %rax ; \ + adcxq %r9, %r9 ; \ + adoxq %rax, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %r15, %r15 ; \ + adoxq %rdx, %r15 ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %rcx, %rcx ; \ + adoxq %rax, %rcx ; \ + adcxq %rbx, %rbx ; \ + adoxq %rdx, %rbx ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rax, %rdi ; \ + adcxq %rbp, %rbp ; \ + adoxq %rax, %rbp ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rdi ; \ + adoxq %rax, %rdi ; \ + movq %rbx, P0 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r8, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r8 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r8 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r9 ; \ + sbbq %r8, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + movq %rdx, %r8 ; \ + sbbq $0x0, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r9, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r9 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r9 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r10 ; \ + sbbq %r9, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r8 ; \ + movq %rdx, %r9 ; \ + sbbq $0x0, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r10, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r10 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r10 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r11 ; \ + sbbq %r10, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + movq %rdx, %r10 ; \ + sbbq $0x0, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r11, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r11 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r11 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r12 ; \ + sbbq %r11, %r13 ; \ + sbbq %rbx, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + movq %rdx, %r11 ; \ + sbbq $0x0, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r12, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r12 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r12 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r13 ; \ + sbbq %r12, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %r11 ; \ + movq %rdx, %r12 ; \ + sbbq $0x0, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r13, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r13 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r13 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r8 ; \ + sbbq %r13, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq $0x0, %r11 ; \ + sbbq $0x0, %r12 ; \ + movq %rdx, %r13 ; \ + sbbq $0x0, %r13 ; \ + movq P0, %rbx ; \ + addq %r8, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r11, %rbx ; \ + adcq %r12, %rbp ; \ + adcq %r13, %rdi ; \ + movl $0x0, %r8d ; \ + adcq %r8, %r8 ; \ + xorq %r11, %r11 ; \ + xorq %r12, %r12 ; \ + xorq %r13, %r13 ; \ + movq $0xffffffff00000001, %rax ; \ + addq %r14, %rax ; \ + movl $0xffffffff, %r9d ; \ + adcq %r15, %r9 ; \ + movl $0x1, %r10d ; \ + adcq %rcx, %r10 ; \ + adcq %rbx, %r11 ; \ + adcq %rbp, %r12 ; \ + adcq %rdi, %r13 ; \ + adcq $0x0, %r8 ; \ + cmovne %rax, %r14 ; \ + cmovne %r9, %r15 ; \ + cmovne %r10, %rcx ; \ + cmovne %r11, %rbx ; \ + cmovne %r12, %rbp ; \ + cmovne %r13, %rdi ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %rcx, 0x10+P0 ; \ + movq %rbx, 0x18+P0 ; \ + movq %rbp, 0x20+P0 ; \ + movq %rdi, 0x28+P0 + +#define sub_p384(P0,P1,P2) \ + movq P1, %rax ; \ + subq P2, %rax ; \ + movq 0x8+P1, %rdx ; \ + sbbq 0x8+P2, %rdx ; \ + movq 0x10+P1, %r8 ; \ + sbbq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + sbbq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + sbbq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + sbbq 0x28+P2, %r11 ; \ + sbbq %rcx, %rcx ; \ + movl $0xffffffff, %ebx ; \ + andq %rbx, %rcx ; \ + xorq %rbx, %rbx ; \ + subq %rcx, %rbx ; \ + subq %rbx, %rax ; \ + movq %rax, P0 ; \ + sbbq %rcx, %rdx ; \ + movq %rdx, 0x8+P0 ; \ + sbbq %rax, %rax ; \ + andq %rbx, %rcx ; \ + negq %rax; \ + sbbq %rcx, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 + +// Simplified bignum_add_p384, without carry chain suspension + +#define add_p384(P0,P1,P2) \ + movq P1, %rax ; \ + addq P2, %rax ; \ + movq 0x8+P1, %rcx ; \ + adcq 0x8+P2, %rcx ; \ + movq 0x10+P1, %r8 ; \ + adcq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + adcq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + adcq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + adcq 0x28+P2, %r11 ; \ + movl $0x0, %edx ; \ + adcq %rdx, %rdx ; \ + movq $0xffffffff00000001, %rbp ; \ + addq %rbp, %rax ; \ + movl $0xffffffff, %ebp ; \ + adcq %rbp, %rcx ; \ + adcq $0x1, %r8 ; \ + adcq $0x0, %r9 ; \ + adcq $0x0, %r10 ; \ + adcq $0x0, %r11 ; \ + adcq $0xffffffffffffffff, %rdx ; \ + movl $1, %ebx ; \ + andq %rdx, %rbx ; \ + andq %rbp, %rdx ; \ + xorq %rbp, %rbp ; \ + subq %rdx, %rbp ; \ + subq %rbp, %rax ; \ + movq %rax, P0 ; \ + sbbq %rdx, %rcx ; \ + movq %rcx, 0x8+P0 ; \ + sbbq %rbx, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 + +// P0 = 4 * P1 - P2 + +#define cmsub41_p384(P0,P1,P2) \ + movq 40+P1, %rdx ; \ + movq %rdx, %r13 ; \ + shrq $62, %rdx ; \ + movq 32+P1, %r12 ; \ + shldq $2, %r12, %r13 ; \ + movq 24+P1, %r11 ; \ + shldq $2, %r11, %r12 ; \ + movq 16+P1, %r10 ; \ + shldq $2, %r10, %r11 ; \ + movq 8+P1, %r9 ; \ + shldq $2, %r9, %r10 ; \ + movq P1, %r8 ; \ + shldq $2, %r8, %r9 ; \ + shlq $2, %r8 ; \ + addq $1, %rdx ; \ + subq P2, %r8 ; \ + sbbq 0x8+P2, %r9 ; \ + sbbq 0x10+P2, %r10 ; \ + sbbq 0x18+P2, %r11 ; \ + sbbq 0x20+P2, %r12 ; \ + sbbq 0x28+P2, %r13 ; \ + sbbq $0, %rdx ; \ + xorq %rcx, %rcx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + movl $0xffffffff, %eax ; \ + mulxq %rax, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + adcxq %rdx, %r10 ; \ + movl $0x0, %eax ; \ + movl $0x0, %ecx ; \ + adoxq %rax, %rax ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + adcq %rcx, %r13 ; \ + adcq %rcx, %rcx ; \ + subq $0x1, %rcx ; \ + movl $0xffffffff, %edx ; \ + xorq %rax, %rax ; \ + andq %rcx, %rdx ; \ + subq %rdx, %rax ; \ + andq $0x1, %rcx ; \ + subq %rax, %r8 ; \ + movq %r8, P0 ; \ + sbbq %rdx, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq %rcx, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 + +// P0 = C * P1 - D * P2 + +#define cmsub_p384(P0,C,P1,D,P2) \ + movq $0x00000000ffffffff, %r8 ; \ + subq P2, %r8 ; \ + movq $0xffffffff00000000, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq $0xfffffffffffffffe, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq $0xffffffffffffffff, %r11 ; \ + sbbq 24+P2, %r11 ; \ + movq $0xffffffffffffffff, %r12 ; \ + sbbq 32+P2, %r12 ; \ + movq $0xffffffffffffffff, %r13 ; \ + sbbq 40+P2, %r13 ; \ + movq $D, %rdx ; \ + mulxq %r8, %r8, %rax ; \ + mulxq %r9, %r9, %rcx ; \ + addq %rax, %r9 ; \ + mulxq %r10, %r10, %rax ; \ + adcq %rcx, %r10 ; \ + mulxq %r11, %r11, %rcx ; \ + adcq %rax, %r11 ; \ + mulxq %r12, %r12, %rax ; \ + adcq %rcx, %r12 ; \ + mulxq %r13, %r13, %r14 ; \ + adcq %rax, %r13 ; \ + adcq $1, %r14 ; \ + xorl %ecx, %ecx ; \ + movq $C, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 16+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 24+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 32+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 40+P1, %rax, %rdx ; \ + adcxq %rax, %r13 ; \ + adoxq %r14, %rdx ; \ + adcxq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + movl $0xffffffff, %eax ; \ + mulxq %rax, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + adcxq %rdx, %r10 ; \ + movl $0x0, %eax ; \ + movl $0x0, %ecx ; \ + adoxq %rax, %rax ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + adcq %rcx, %r13 ; \ + adcq %rcx, %rcx ; \ + subq $0x1, %rcx ; \ + movl $0xffffffff, %edx ; \ + xorq %rax, %rax ; \ + andq %rcx, %rdx ; \ + subq %rdx, %rax ; \ + andq $0x1, %rcx ; \ + subq %rax, %r8 ; \ + movq %r8, P0 ; \ + sbbq %rdx, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq %rcx, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 + +// A weak version of add that only guarantees sum in 6 digits + +#define weakadd_p384(P0,P1,P2) \ + movq P1, %rax ; \ + addq P2, %rax ; \ + movq 0x8+P1, %rcx ; \ + adcq 0x8+P2, %rcx ; \ + movq 0x10+P1, %r8 ; \ + adcq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + adcq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + adcq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + adcq 0x28+P2, %r11 ; \ + sbbq %rdx, %rdx ; \ + movl $1, %ebx ; \ + andq %rdx, %rbx ; \ + movl $0xffffffff, %ebp ; \ + andq %rbp, %rdx ; \ + xorq %rbp, %rbp ; \ + subq %rdx, %rbp ; \ + addq %rbp, %rax ; \ + movq %rax, P0 ; \ + adcq %rdx, %rcx ; \ + movq %rcx, 0x8+P0 ; \ + adcq %rbx, %r8 ; \ + movq %r8, 0x10+P0 ; \ + adcq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + adcq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + adcq $0x0, %r11 ; \ + movq %r11, 0x28+P0 + +// P0 = 3 * P1 - 8 * P2 + +#define cmsub38_p384(P0,P1,P2) \ + movq $0x00000000ffffffff, %r8 ; \ + subq P2, %r8 ; \ + movq $0xffffffff00000000, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq $0xfffffffffffffffe, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq $0xffffffffffffffff, %r11 ; \ + sbbq 24+P2, %r11 ; \ + movq $0xffffffffffffffff, %r12 ; \ + sbbq 32+P2, %r12 ; \ + movq $0xffffffffffffffff, %r13 ; \ + sbbq 40+P2, %r13 ; \ + movq %r13, %r14 ; \ + shrq $61, %r14 ; \ + shldq $3, %r12, %r13 ; \ + shldq $3, %r11, %r12 ; \ + shldq $3, %r10, %r11 ; \ + shldq $3, %r9, %r10 ; \ + shldq $3, %r8, %r9 ; \ + shlq $3, %r8 ; \ + addq $1, %r14 ; \ + xorl %ecx, %ecx ; \ + movq $3, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 16+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 24+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 32+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 40+P1, %rax, %rdx ; \ + adcxq %rax, %r13 ; \ + adoxq %r14, %rdx ; \ + adcxq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + movl $0xffffffff, %eax ; \ + mulxq %rax, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + adcxq %rdx, %r10 ; \ + movl $0x0, %eax ; \ + movl $0x0, %ecx ; \ + adoxq %rax, %rax ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + adcq %rcx, %r13 ; \ + adcq %rcx, %rcx ; \ + subq $0x1, %rcx ; \ + movl $0xffffffff, %edx ; \ + xorq %rax, %rax ; \ + andq %rcx, %rdx ; \ + subq %rdx, %rax ; \ + andq $0x1, %rcx ; \ + subq %rax, %r8 ; \ + movq %r8, P0 ; \ + sbbq %rdx, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq %rcx, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 + +S2N_BN_SYMBOL(p384_montjdouble): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +// Save registers and make room on stack for temporary variables +// Save the outpuy pointer %rdi which gets overwritten in earlier +// operations before it is used. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + + movq %rdi, input_z + +// Main code, just a sequence of basic field operations + +// z2 = z^2 +// y2 = y^2 + + montsqr_p384(z2,z_1) + montsqr_p384(y2,y_1) + +// x2p = x^2 - z^4 = (x + z^2) * (x - z^2) + + weakadd_p384(t1,x_1,z2) + sub_p384(t2,x_1,z2) + montmul_p384(x2p,t1,t2) + +// t1 = y + z +// x4p = x2p^2 +// xy2 = x * y^2 + + add_p384(t1,y_1,z_1) + montsqr_p384(x4p,x2p) + montmul_p384(xy2,x_1,y2) + +// t2 = (y + z)^2 + + montsqr_p384(t2,t1) + +// d = 12 * xy2 - 9 * x4p +// t1 = y^2 + 2 * y * z + + cmsub_p384(d,12,xy2,9,x4p) + sub_p384(t1,t2,z2) + +// y4 = y^4 + + montsqr_p384(y4,y2) + +// Restore the output pointer to write to x_3, y_3 and z_3. + + movq input_z, %rdi + +// z_3' = 2 * y * z +// dx2 = d * x2p + + sub_p384(z_3,t1,y2) + montmul_p384(dx2,d,x2p) + +// x' = 4 * xy2 - d + + cmsub41_p384(x_3,xy2,d) + +// y' = 3 * dx2 - 8 * y4 + + cmsub38_p384(y_3,dx2,y4) + +// Restore stack and registers + + addq $NSPACE, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/p384/p384_montjmixadd.S b/x86_att/p384/p384_montjmixadd.S new file mode 100644 index 0000000000..6749209eb4 --- /dev/null +++ b/x86_att/p384/p384_montjmixadd.S @@ -0,0 +1,941 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjmixadd +// (uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 12]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// The "mixed" part means that p2 only has x and y coordinates, with the +// implicit z coordinate assumed to be the identity. +// +// Standard x86-64 ABI: RDI = p3, RSI = p1, RDX = p2 +// Microsoft x64 ABI: RCX = p3, RDX = p1, R8 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjmixadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjmixadd) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Pointer-offset pairs for inputs and outputs +// These assume %rdi = p3, %rsi = p1 and %rcx = p2, +// which needs to be set up explicitly before use + +#define x_1 0(%rsi) +#define y_1 NUMSIZE(%rsi) +#define z_1 (2*NUMSIZE)(%rsi) + +#define x_2 0(%rcx) +#define y_2 NUMSIZE(%rcx) + +#define x_3 0(%rdi) +#define y_3 NUMSIZE(%rdi) +#define z_3 (2*NUMSIZE)(%rdi) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define zp2 (NUMSIZE*0)(%rsp) +#define ww (NUMSIZE*0)(%rsp) + +#define yd (NUMSIZE*1)(%rsp) +#define y2a (NUMSIZE*1)(%rsp) + +#define x2a (NUMSIZE*2)(%rsp) +#define zzx2 (NUMSIZE*2)(%rsp) + +#define zz (NUMSIZE*3)(%rsp) +#define t1 (NUMSIZE*3)(%rsp) + +#define t2 (NUMSIZE*4)(%rsp) +#define zzx1 (NUMSIZE*4)(%rsp) + +#define xd (NUMSIZE*5)(%rsp) + +// Temporaries for the actual input pointers + +#define input_x (NUMSIZE*6)(%rsp) +#define input_y (NUMSIZE*6+8)(%rsp) +#define input_z (NUMSIZE*6+16)(%rsp) + +#define NSPACE (NUMSIZE*6+24) + +// Corresponds exactly to bignum_montmul_p384 + +#define montmul_p384(P0,P1,P2) \ + movq P2, %rdx ; \ + xorl %r15d, %r15d ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + addq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + adcq %r15, %r14 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r8, %rbx ; \ + adcq %r8, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbp, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x8+P2, %rdx ; \ + xorl %r8d, %r8d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + adoxq %r8, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r14 ; \ + adcq %rbx, %r15 ; \ + adcq %r8, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r9, %rbx ; \ + adcq %r9, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq %rbp, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r14 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r15 ; \ + adcq $0x0, %r8 ; \ + movq 0x10+P2, %rdx ; \ + xorl %r9d, %r9d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adoxq %r9, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r15 ; \ + adcq %rbx, %r8 ; \ + adcq %r9, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r10, %rbx ; \ + adcq %r10, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq %rbp, %r13 ; \ + sbbq $0x0, %r14 ; \ + sbbq $0x0, %r15 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r8 ; \ + adcq $0x0, %r9 ; \ + movq 0x18+P2, %rdx ; \ + xorl %r10d, %r10d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + adoxq %r10, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %r10, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r11, %rbx ; \ + adcq %r11, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq %rbp, %r14 ; \ + sbbq $0x0, %r15 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r9 ; \ + adcq $0x0, %r10 ; \ + movq 0x20+P2, %rdx ; \ + xorl %r11d, %r11d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + adoxq %r11, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %r11, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r12, %rbx ; \ + adcq %r12, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r13 ; \ + sbbq %rbx, %r14 ; \ + sbbq %rbp, %r15 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + movq 0x28+P2, %rdx ; \ + xorl %r12d, %r12d ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + adoxq %r12, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcq %rax, %r10 ; \ + adcq %rbx, %r11 ; \ + adcq %r12, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %rbx, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %r13, %rbx ; \ + adcq %r13, %rax ; \ + adcq %rdx, %rbx ; \ + adcl %ebp, %ebp ; \ + subq %rax, %r14 ; \ + sbbq %rbx, %r15 ; \ + sbbq %rbp, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %rdx ; \ + addq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorl %edx, %edx ; \ + xorl %ebp, %ebp ; \ + xorl %r13d, %r13d ; \ + movq $0xffffffff00000001, %rax ; \ + addq %r14, %rax ; \ + movl $0xffffffff, %ebx ; \ + adcq %r15, %rbx ; \ + movl $0x1, %ecx ; \ + adcq %r8, %rcx ; \ + adcq %r9, %rdx ; \ + adcq %r10, %rbp ; \ + adcq %r11, %r13 ; \ + adcq $0x0, %r12 ; \ + cmovne %rax, %r14 ; \ + cmovne %rbx, %r15 ; \ + cmovne %rcx, %r8 ; \ + cmovne %rdx, %r9 ; \ + cmovne %rbp, %r10 ; \ + cmovne %r13, %r11 ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %r8, 0x10+P0 ; \ + movq %r9, 0x18+P0 ; \ + movq %r10, 0x20+P0 ; \ + movq %r11, 0x28+P0 + +// Corresponds exactly to bignum_montsqr_p384 + +#define montsqr_p384(P0,P1) \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + mulxq 0x28+P1, %r13, %r14 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %r15, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %rcx ; \ + adcq %rbp, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x20+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rdx ; \ + adcxq %rax, %r15 ; \ + adoxq %rdx, %rcx ; \ + movq 0x28+P1, %rdx ; \ + mulxq 0x20+P1, %rbx, %rbp ; \ + mulxq 0x18+P1, %rax, %rdx ; \ + adcxq %rax, %rcx ; \ + adoxq %rdx, %rbx ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rbx ; \ + adoxq %rax, %rbp ; \ + adcq %rax, %rbp ; \ + xorq %rax, %rax ; \ + movq P1, %rdx ; \ + mulxq P1, %r8, %rax ; \ + adcxq %r9, %r9 ; \ + adoxq %rax, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %r15, %r15 ; \ + adoxq %rdx, %r15 ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %rcx, %rcx ; \ + adoxq %rax, %rcx ; \ + adcxq %rbx, %rbx ; \ + adoxq %rdx, %rbx ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rax, %rsi ; \ + adcxq %rbp, %rbp ; \ + adoxq %rax, %rbp ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rsi ; \ + adoxq %rax, %rsi ; \ + movq %rbx, P0 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r8, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r8 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r8 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r9 ; \ + sbbq %r8, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + movq %rdx, %r8 ; \ + sbbq $0x0, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r9, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r9 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r9 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r10 ; \ + sbbq %r9, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r8 ; \ + movq %rdx, %r9 ; \ + sbbq $0x0, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r10, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r10 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r10 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r11 ; \ + sbbq %r10, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + movq %rdx, %r10 ; \ + sbbq $0x0, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r11, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r11 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r11 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r12 ; \ + sbbq %r11, %r13 ; \ + sbbq %rbx, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + movq %rdx, %r11 ; \ + sbbq $0x0, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r12, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r12 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r12 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r13 ; \ + sbbq %r12, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %r11 ; \ + movq %rdx, %r12 ; \ + sbbq $0x0, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r13, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r13 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r13 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r8 ; \ + sbbq %r13, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq $0x0, %r11 ; \ + sbbq $0x0, %r12 ; \ + movq %rdx, %r13 ; \ + sbbq $0x0, %r13 ; \ + movq P0, %rbx ; \ + addq %r8, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r11, %rbx ; \ + adcq %r12, %rbp ; \ + adcq %r13, %rsi ; \ + movl $0x0, %r8d ; \ + adcq %r8, %r8 ; \ + xorq %r11, %r11 ; \ + xorq %r12, %r12 ; \ + xorq %r13, %r13 ; \ + movq $0xffffffff00000001, %rax ; \ + addq %r14, %rax ; \ + movl $0xffffffff, %r9d ; \ + adcq %r15, %r9 ; \ + movl $0x1, %r10d ; \ + adcq %rcx, %r10 ; \ + adcq %rbx, %r11 ; \ + adcq %rbp, %r12 ; \ + adcq %rsi, %r13 ; \ + adcq $0x0, %r8 ; \ + cmovne %rax, %r14 ; \ + cmovne %r9, %r15 ; \ + cmovne %r10, %rcx ; \ + cmovne %r11, %rbx ; \ + cmovne %r12, %rbp ; \ + cmovne %r13, %rsi ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %rcx, 0x10+P0 ; \ + movq %rbx, 0x18+P0 ; \ + movq %rbp, 0x20+P0 ; \ + movq %rsi, 0x28+P0 + +// Almost-Montgomery variant which we use when an input to other muls +// with the other argument fully reduced (which is always safe). + +#define amontsqr_p384(P0,P1) \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + mulxq 0x28+P1, %r13, %r14 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %r15, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %rcx ; \ + adcq %rbp, %rcx ; \ + xorl %ebp, %ebp ; \ + movq 0x20+P1, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rdx ; \ + adcxq %rax, %r15 ; \ + adoxq %rdx, %rcx ; \ + movq 0x28+P1, %rdx ; \ + mulxq 0x20+P1, %rbx, %rbp ; \ + mulxq 0x18+P1, %rax, %rdx ; \ + adcxq %rax, %rcx ; \ + adoxq %rdx, %rbx ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rbx ; \ + adoxq %rax, %rbp ; \ + adcq %rax, %rbp ; \ + xorq %rax, %rax ; \ + movq P1, %rdx ; \ + mulxq P1, %r8, %rax ; \ + adcxq %r9, %r9 ; \ + adoxq %rax, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %r15, %r15 ; \ + adoxq %rdx, %r15 ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %rcx, %rcx ; \ + adoxq %rax, %rcx ; \ + adcxq %rbx, %rbx ; \ + adoxq %rdx, %rbx ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rax, %rsi ; \ + adcxq %rbp, %rbp ; \ + adoxq %rax, %rbp ; \ + movl $0x0, %eax ; \ + adcxq %rax, %rsi ; \ + adoxq %rax, %rsi ; \ + movq %rbx, P0 ; \ + movq %r8, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r8, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r8, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r8 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r8 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r9 ; \ + sbbq %r8, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq $0x0, %r12 ; \ + sbbq $0x0, %r13 ; \ + movq %rdx, %r8 ; \ + sbbq $0x0, %r8 ; \ + movq %r9, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r9, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r9, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r9 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r9 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r10 ; \ + sbbq %r9, %r11 ; \ + sbbq %rbx, %r12 ; \ + sbbq $0x0, %r13 ; \ + sbbq $0x0, %r8 ; \ + movq %rdx, %r9 ; \ + sbbq $0x0, %r9 ; \ + movq %r10, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r10, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r10, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r10 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r10 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r11 ; \ + sbbq %r10, %r12 ; \ + sbbq %rbx, %r13 ; \ + sbbq $0x0, %r8 ; \ + sbbq $0x0, %r9 ; \ + movq %rdx, %r10 ; \ + sbbq $0x0, %r10 ; \ + movq %r11, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r11, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r11, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r11 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r11 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r12 ; \ + sbbq %r11, %r13 ; \ + sbbq %rbx, %r8 ; \ + sbbq $0x0, %r9 ; \ + sbbq $0x0, %r10 ; \ + movq %rdx, %r11 ; \ + sbbq $0x0, %r11 ; \ + movq %r12, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r12, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r12, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r12 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r12 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r13 ; \ + sbbq %r12, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq $0x0, %r10 ; \ + sbbq $0x0, %r11 ; \ + movq %rdx, %r12 ; \ + sbbq $0x0, %r12 ; \ + movq %r13, %rdx ; \ + shlq $0x20, %rdx ; \ + addq %r13, %rdx ; \ + movq $0xffffffff00000001, %rax ; \ + mulxq %rax, %r13, %rax ; \ + movl $0xffffffff, %ebx ; \ + mulxq %rbx, %rbx, %r13 ; \ + addq %rbx, %rax ; \ + adcq %rdx, %r13 ; \ + movl $0x0, %ebx ; \ + adcq %rbx, %rbx ; \ + subq %rax, %r8 ; \ + sbbq %r13, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq $0x0, %r11 ; \ + sbbq $0x0, %r12 ; \ + movq %rdx, %r13 ; \ + sbbq $0x0, %r13 ; \ + movq P0, %rbx ; \ + addq %r8, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r11, %rbx ; \ + adcq %r12, %rbp ; \ + adcq %r13, %rsi ; \ + movl $0x0, %r8d ; \ + movq $0xffffffff00000001, %rax ; \ + movl $0xffffffff, %r9d ; \ + movl $0x1, %r10d ; \ + cmovnc %r8, %rax ; \ + cmovnc %r8, %r9 ; \ + cmovnc %r8, %r10 ; \ + addq %rax, %r14 ; \ + adcq %r9, %r15 ; \ + adcq %r10, %rcx ; \ + adcq %r8, %rbx ; \ + adcq %r8, %rbp ; \ + adcq %r8, %rsi ; \ + movq %r14, P0 ; \ + movq %r15, 0x8+P0 ; \ + movq %rcx, 0x10+P0 ; \ + movq %rbx, 0x18+P0 ; \ + movq %rbp, 0x20+P0 ; \ + movq %rsi, 0x28+P0 + +// Corresponds exactly to bignum_sub_p384 + +#define sub_p384(P0,P1,P2) \ + movq P1, %rax ; \ + subq P2, %rax ; \ + movq 0x8+P1, %rdx ; \ + sbbq 0x8+P2, %rdx ; \ + movq 0x10+P1, %r8 ; \ + sbbq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + sbbq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + sbbq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + sbbq 0x28+P2, %r11 ; \ + sbbq %rcx, %rcx ; \ + movl $0xffffffff, %esi ; \ + andq %rsi, %rcx ; \ + xorq %rsi, %rsi ; \ + subq %rcx, %rsi ; \ + subq %rsi, %rax ; \ + movq %rax, P0 ; \ + sbbq %rcx, %rdx ; \ + movq %rdx, 0x8+P0 ; \ + sbbq %rax, %rax ; \ + andq %rsi, %rcx ; \ + negq %rax; \ + sbbq %rcx, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 + +S2N_BN_SYMBOL(p384_montjmixadd): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save registers and make room on stack for temporary variables +// Put the input arguments in non-volatile places on the stack + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + + movq %rdi, input_z + movq %rsi, input_x + movq %rdx, input_y + +// Main code, just a sequence of basic field operations +// 8 * multiply + 3 * square + 7 * subtract + + amontsqr_p384(zp2,z_1) + + movq input_x, %rsi + movq input_y, %rcx + montmul_p384(y2a,z_1,y_2) + + movq input_y, %rcx + montmul_p384(x2a,zp2,x_2) + + montmul_p384(y2a,zp2,y2a) + + movq input_x, %rsi + sub_p384(xd,x2a,x_1) + movq input_x, %rsi + sub_p384(yd,y2a,y_1) + + amontsqr_p384(zz,xd) + montsqr_p384(ww,yd) + + movq input_x, %rsi + montmul_p384(zzx1,zz,x_1) + montmul_p384(zzx2,zz,x2a) + + movq input_z, %rdi + sub_p384(x_3,ww,zzx1) + sub_p384(t1,zzx2,zzx1) + + movq input_z, %rdi + movq input_x, %rsi + montmul_p384(z_3,xd,z_1) + + movq input_z, %rdi + sub_p384(x_3,x_3,zzx2) + + movq input_z, %rdi + sub_p384(t2,zzx1,x_3) + + movq input_x, %rsi + montmul_p384(t1,t1,y_1) + montmul_p384(t2,yd,t2) + + movq input_z, %rdi + sub_p384(y_3,t2,t1) + +// Restore stack and registers + + addq $NSPACE, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From 6fec512d151fcb73f20593881bfc9ba543b6f162 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 15 Jul 2022 21:36:04 -0700 Subject: [PATCH 13/42] Tweak ARM bignum_sqr_p521_alt to use fewer registers s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/57a43a3c6f4d29c822b1c226557ced539be575ef --- arm/p521/bignum_sqr_p521_alt.S | 68 ++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/arm/p521/bignum_sqr_p521_alt.S b/arm/p521/bignum_sqr_p521_alt.S index 6ba447f390..fe2201c00e 100644 --- a/arm/p521/bignum_sqr_p521_alt.S +++ b/arm/p521/bignum_sqr_p521_alt.S @@ -43,23 +43,23 @@ #define l x10 -#define u0 x11 -#define u1 x12 -#define u2 x13 -#define u3 x14 -#define u4 x15 -#define u5 x16 -#define u6 x17 -#define u7 x19 -#define u8 x20 -#define u9 x21 -#define u10 x22 -#define u11 x23 -#define u12 x24 -#define u13 x25 -#define u14 x26 -#define u15 x27 -#define u16 x29 +#define u0 x2 // The same as a0 +#define u1 x11 +#define u2 x12 +#define u3 x13 +#define u4 x14 +#define u5 x15 +#define u6 x16 +#define u7 x17 +#define u8 x19 +#define u9 x20 +#define u10 x21 +#define u11 x22 +#define u12 x23 +#define u13 x24 +#define u14 x25 +#define u15 x26 +#define u16 x4 // The same as a2 S2N_BN_SYMBOL(bignum_sqr_p521_alt): @@ -69,7 +69,6 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt): stp x21, x22, [sp, #-16]! stp x23, x24, [sp, #-16]! stp x25, x26, [sp, #-16]! - stp x27, x29, [sp, #-16]! // Load low 8 elements as [a7;a6;a5;a4;a3;a2;a1;a0], set up an initial // window [u8;u7;u6;u5;u4;u3;u2;u1] = 10 + 20 + 30 + 40 + 50 + 60 + 70 @@ -231,7 +230,6 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt): // Add the homogeneous terms 00 + 11 + 22 + 33 + 44 + 55 + 66 + 77 umulh l, a0, a0 - mul u0, a0, a0 adds u1, u1, l mul l, a1, a1 @@ -269,49 +267,58 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt): umulh l, a7, a7 adc u15, u15, l -// Now load in the top digit a8, and also set up its double and square +// Now load in the top digit a8, and immediately double the register ldr a8, [x, #64] - mul u16, a8, a8 add a8, a8, a8 -// Add a8 * [a7;...;a0] into the top of the buffer +// Add (2 * a8) * [a7;...;a0] into the top of the buffer +// At the end of the first chain we form u16 = a8 ^ 2. +// This needs us to shift right the modified a8 again but it saves a +// register, and the overall performance impact seems slightly positive. mul l, a8, a0 adds u8, u8, l - mul l, a8, a1 + umulh l, a8, a0 adcs u9, u9, l mul l, a8, a2 adcs u10, u10, l - mul l, a8, a3 + umulh l, a8, a2 adcs u11, u11, l mul l, a8, a4 adcs u12, u12, l - mul l, a8, a5 + umulh l, a8, a4 adcs u13, u13, l mul l, a8, a6 adcs u14, u14, l - mul l, a8, a7 + umulh l, a8, a6 adcs u15, u15, l + lsr u16, a8, #1 + mul u16, u16, u16 adc u16, u16, xzr - umulh l, a8, a0 + mul l, a8, a1 adds u9, u9, l umulh l, a8, a1 adcs u10, u10, l - umulh l, a8, a2 + mul l, a8, a3 adcs u11, u11, l umulh l, a8, a3 adcs u12, u12, l - umulh l, a8, a4 + mul l, a8, a5 adcs u13, u13, l umulh l, a8, a5 adcs u14, u14, l - umulh l, a8, a6 + mul l, a8, a7 adcs u15, u15, l umulh l, a8, a7 adc u16, u16, l +// Finally squeeze in the lowest mul. This didn't need to be involved +// in the addition chains and moreover lets us re-use u0 == a0 + + mul u0, a0, a0 + // Now we have the full product, which we consider as // 2^521 * h + l. Form h + l + 1 @@ -361,7 +368,6 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt): // Restore registers and return - ldp x27, x29, [sp], #16 ldp x25, x26, [sp], #16 ldp x23, x24, [sp], #16 ldp x21, x22, [sp], #16 From 7b51bdfd435c31d1c00b3100c567c0f912ebd509 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 21 Jul 2022 06:35:02 -0700 Subject: [PATCH 14/42] Add basic NIST P-521 point operations The same trio of a point doubling function, point addition function and point mixed addition function, this time for the P-521 curve, all using Jacobian coordinates, with input nondegeneracy assumed. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/047c0b1401610f9933a60ce0836143f9217ffa34 --- arm/p521/Makefile | 5 +- arm/p521/p521_jadd.S | 808 +++++++++++++++++++ arm/p521/p521_jdouble.S | 1470 +++++++++++++++++++++++++++++++++++ arm/p521/p521_jmixadd.S | 797 +++++++++++++++++++ x86_att/p521/p521_jadd.S | 765 ++++++++++++++++++ x86_att/p521/p521_jdouble.S | 1386 +++++++++++++++++++++++++++++++++ x86_att/p521/p521_jmixadd.S | 756 ++++++++++++++++++ 7 files changed, 5986 insertions(+), 1 deletion(-) create mode 100644 arm/p521/p521_jadd.S create mode 100644 arm/p521/p521_jdouble.S create mode 100644 arm/p521/p521_jmixadd.S create mode 100644 x86_att/p521/p521_jadd.S create mode 100644 x86_att/p521/p521_jdouble.S create mode 100644 x86_att/p521/p521_jmixadd.S diff --git a/arm/p521/Makefile b/arm/p521/Makefile index 921016115f..9ea36beb96 100644 --- a/arm/p521/Makefile +++ b/arm/p521/Makefile @@ -53,7 +53,10 @@ OBJ = bignum_add_p521.o \ bignum_sub_p521.o \ bignum_tolebytes_p521.o \ bignum_tomont_p521.o \ - bignum_triple_p521.o + bignum_triple_p521.o \ + p521_jadd.o \ + p521_jdouble.o \ + p521_jmixadd.o %.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ - diff --git a/arm/p521/p521_jadd.S b/arm/p521/p521_jadd.S new file mode 100644 index 0000000000..85e62e01e7 --- /dev/null +++ b/arm/p521/p521_jadd.S @@ -0,0 +1,808 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point addition on NIST curve P-521 in Jacobian coordinates +// +// extern void p521_jadd +// (uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 27]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples. +// A Jacobian triple (x,y,z) represents affine point (x/z^2,y/z^3). +// It is assumed that all coordinates of the input points p1 and p2 are +// fully reduced mod p_521, that both z coordinates are nonzero and +// that neither p1 =~= p2 or p1 =~= -p2, where "=~=" means "represents +// the same affine point as". +// +// Standard ARM ABI: X0 = p3, X1 = p1, X2 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p521_jadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p521_jadd) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 72 + +// Stable homes for input arguments during main code sequence + +#define input_z x26 +#define input_x x27 +#define input_y x28 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_2 input_y, #0 +#define y_2 input_y, #NUMSIZE +#define z_2 input_y, #(2*NUMSIZE) + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z1sq sp, #(NUMSIZE*0) +#define ww sp, #(NUMSIZE*0) + +#define yd sp, #(NUMSIZE*1) +#define y2a sp, #(NUMSIZE*1) + +#define x2a sp, #(NUMSIZE*2) +#define zzx2 sp, #(NUMSIZE*2) + +#define zz sp, #(NUMSIZE*3) +#define t1 sp, #(NUMSIZE*3) + +#define t2 sp, #(NUMSIZE*4) +#define x1a sp, #(NUMSIZE*4) +#define zzx1 sp, #(NUMSIZE*4) + +#define xd sp, #(NUMSIZE*5) +#define z2sq sp, #(NUMSIZE*5) + +#define y1a sp, #(NUMSIZE*6) + +// NUMSIZE*7 is not 16-aligned so we round it up + +#define NSPACE (NUMSIZE*7+8) + +// Corresponds exactly to bignum_mul_p521_alt + +#define mul_p521(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x15, x3, x5; \ + umulh x16, x3, x5; \ + mul x14, x3, x6; \ + umulh x17, x3, x6; \ + adds x16, x16, x14; \ + ldp x7, x8, [P2+16]; \ + mul x14, x3, x7; \ + umulh x19, x3, x7; \ + adcs x17, x17, x14; \ + mul x14, x3, x8; \ + umulh x20, x3, x8; \ + adcs x19, x19, x14; \ + ldp x9, x10, [P2+32]; \ + mul x14, x3, x9; \ + umulh x21, x3, x9; \ + adcs x20, x20, x14; \ + mul x14, x3, x10; \ + umulh x22, x3, x10; \ + adcs x21, x21, x14; \ + ldp x11, x12, [P2+48]; \ + mul x14, x3, x11; \ + umulh x23, x3, x11; \ + adcs x22, x22, x14; \ + ldr x13, [P2+64]; \ + mul x14, x3, x12; \ + umulh x24, x3, x12; \ + adcs x23, x23, x14; \ + mul x14, x3, x13; \ + umulh x1, x3, x13; \ + adcs x24, x24, x14; \ + adc x1, x1, xzr; \ + mul x14, x4, x5; \ + adds x16, x16, x14; \ + mul x14, x4, x6; \ + adcs x17, x17, x14; \ + mul x14, x4, x7; \ + adcs x19, x19, x14; \ + mul x14, x4, x8; \ + adcs x20, x20, x14; \ + mul x14, x4, x9; \ + adcs x21, x21, x14; \ + mul x14, x4, x10; \ + adcs x22, x22, x14; \ + mul x14, x4, x11; \ + adcs x23, x23, x14; \ + mul x14, x4, x12; \ + adcs x24, x24, x14; \ + mul x14, x4, x13; \ + adcs x1, x1, x14; \ + cset x0, hs; \ + umulh x14, x4, x5; \ + adds x17, x17, x14; \ + umulh x14, x4, x6; \ + adcs x19, x19, x14; \ + umulh x14, x4, x7; \ + adcs x20, x20, x14; \ + umulh x14, x4, x8; \ + adcs x21, x21, x14; \ + umulh x14, x4, x9; \ + adcs x22, x22, x14; \ + umulh x14, x4, x10; \ + adcs x23, x23, x14; \ + umulh x14, x4, x11; \ + adcs x24, x24, x14; \ + umulh x14, x4, x12; \ + adcs x1, x1, x14; \ + umulh x14, x4, x13; \ + adc x0, x0, x14; \ + stp x15, x16, [P0]; \ + ldp x3, x4, [P1+16]; \ + mul x14, x3, x5; \ + adds x17, x17, x14; \ + mul x14, x3, x6; \ + adcs x19, x19, x14; \ + mul x14, x3, x7; \ + adcs x20, x20, x14; \ + mul x14, x3, x8; \ + adcs x21, x21, x14; \ + mul x14, x3, x9; \ + adcs x22, x22, x14; \ + mul x14, x3, x10; \ + adcs x23, x23, x14; \ + mul x14, x3, x11; \ + adcs x24, x24, x14; \ + mul x14, x3, x12; \ + adcs x1, x1, x14; \ + mul x14, x3, x13; \ + adcs x0, x0, x14; \ + cset x15, hs; \ + umulh x14, x3, x5; \ + adds x19, x19, x14; \ + umulh x14, x3, x6; \ + adcs x20, x20, x14; \ + umulh x14, x3, x7; \ + adcs x21, x21, x14; \ + umulh x14, x3, x8; \ + adcs x22, x22, x14; \ + umulh x14, x3, x9; \ + adcs x23, x23, x14; \ + umulh x14, x3, x10; \ + adcs x24, x24, x14; \ + umulh x14, x3, x11; \ + adcs x1, x1, x14; \ + umulh x14, x3, x12; \ + adcs x0, x0, x14; \ + umulh x14, x3, x13; \ + adc x15, x15, x14; \ + mul x14, x4, x5; \ + adds x19, x19, x14; \ + mul x14, x4, x6; \ + adcs x20, x20, x14; \ + mul x14, x4, x7; \ + adcs x21, x21, x14; \ + mul x14, x4, x8; \ + adcs x22, x22, x14; \ + mul x14, x4, x9; \ + adcs x23, x23, x14; \ + mul x14, x4, x10; \ + adcs x24, x24, x14; \ + mul x14, x4, x11; \ + adcs x1, x1, x14; \ + mul x14, x4, x12; \ + adcs x0, x0, x14; \ + mul x14, x4, x13; \ + adcs x15, x15, x14; \ + cset x16, hs; \ + umulh x14, x4, x5; \ + adds x20, x20, x14; \ + umulh x14, x4, x6; \ + adcs x21, x21, x14; \ + umulh x14, x4, x7; \ + adcs x22, x22, x14; \ + umulh x14, x4, x8; \ + adcs x23, x23, x14; \ + umulh x14, x4, x9; \ + adcs x24, x24, x14; \ + umulh x14, x4, x10; \ + adcs x1, x1, x14; \ + umulh x14, x4, x11; \ + adcs x0, x0, x14; \ + umulh x14, x4, x12; \ + adcs x15, x15, x14; \ + umulh x14, x4, x13; \ + adc x16, x16, x14; \ + stp x17, x19, [P0+16]; \ + ldp x3, x4, [P1+32]; \ + mul x14, x3, x5; \ + adds x20, x20, x14; \ + mul x14, x3, x6; \ + adcs x21, x21, x14; \ + mul x14, x3, x7; \ + adcs x22, x22, x14; \ + mul x14, x3, x8; \ + adcs x23, x23, x14; \ + mul x14, x3, x9; \ + adcs x24, x24, x14; \ + mul x14, x3, x10; \ + adcs x1, x1, x14; \ + mul x14, x3, x11; \ + adcs x0, x0, x14; \ + mul x14, x3, x12; \ + adcs x15, x15, x14; \ + mul x14, x3, x13; \ + adcs x16, x16, x14; \ + cset x17, hs; \ + umulh x14, x3, x5; \ + adds x21, x21, x14; \ + umulh x14, x3, x6; \ + adcs x22, x22, x14; \ + umulh x14, x3, x7; \ + adcs x23, x23, x14; \ + umulh x14, x3, x8; \ + adcs x24, x24, x14; \ + umulh x14, x3, x9; \ + adcs x1, x1, x14; \ + umulh x14, x3, x10; \ + adcs x0, x0, x14; \ + umulh x14, x3, x11; \ + adcs x15, x15, x14; \ + umulh x14, x3, x12; \ + adcs x16, x16, x14; \ + umulh x14, x3, x13; \ + adc x17, x17, x14; \ + mul x14, x4, x5; \ + adds x21, x21, x14; \ + mul x14, x4, x6; \ + adcs x22, x22, x14; \ + mul x14, x4, x7; \ + adcs x23, x23, x14; \ + mul x14, x4, x8; \ + adcs x24, x24, x14; \ + mul x14, x4, x9; \ + adcs x1, x1, x14; \ + mul x14, x4, x10; \ + adcs x0, x0, x14; \ + mul x14, x4, x11; \ + adcs x15, x15, x14; \ + mul x14, x4, x12; \ + adcs x16, x16, x14; \ + mul x14, x4, x13; \ + adcs x17, x17, x14; \ + cset x19, hs; \ + umulh x14, x4, x5; \ + adds x22, x22, x14; \ + umulh x14, x4, x6; \ + adcs x23, x23, x14; \ + umulh x14, x4, x7; \ + adcs x24, x24, x14; \ + umulh x14, x4, x8; \ + adcs x1, x1, x14; \ + umulh x14, x4, x9; \ + adcs x0, x0, x14; \ + umulh x14, x4, x10; \ + adcs x15, x15, x14; \ + umulh x14, x4, x11; \ + adcs x16, x16, x14; \ + umulh x14, x4, x12; \ + adcs x17, x17, x14; \ + umulh x14, x4, x13; \ + adc x19, x19, x14; \ + stp x20, x21, [P0+32]; \ + ldp x3, x4, [P1+48]; \ + mul x14, x3, x5; \ + adds x22, x22, x14; \ + mul x14, x3, x6; \ + adcs x23, x23, x14; \ + mul x14, x3, x7; \ + adcs x24, x24, x14; \ + mul x14, x3, x8; \ + adcs x1, x1, x14; \ + mul x14, x3, x9; \ + adcs x0, x0, x14; \ + mul x14, x3, x10; \ + adcs x15, x15, x14; \ + mul x14, x3, x11; \ + adcs x16, x16, x14; \ + mul x14, x3, x12; \ + adcs x17, x17, x14; \ + mul x14, x3, x13; \ + adcs x19, x19, x14; \ + cset x20, hs; \ + umulh x14, x3, x5; \ + adds x23, x23, x14; \ + umulh x14, x3, x6; \ + adcs x24, x24, x14; \ + umulh x14, x3, x7; \ + adcs x1, x1, x14; \ + umulh x14, x3, x8; \ + adcs x0, x0, x14; \ + umulh x14, x3, x9; \ + adcs x15, x15, x14; \ + umulh x14, x3, x10; \ + adcs x16, x16, x14; \ + umulh x14, x3, x11; \ + adcs x17, x17, x14; \ + umulh x14, x3, x12; \ + adcs x19, x19, x14; \ + umulh x14, x3, x13; \ + adc x20, x20, x14; \ + mul x14, x4, x5; \ + adds x23, x23, x14; \ + mul x14, x4, x6; \ + adcs x24, x24, x14; \ + mul x14, x4, x7; \ + adcs x1, x1, x14; \ + mul x14, x4, x8; \ + adcs x0, x0, x14; \ + mul x14, x4, x9; \ + adcs x15, x15, x14; \ + mul x14, x4, x10; \ + adcs x16, x16, x14; \ + mul x14, x4, x11; \ + adcs x17, x17, x14; \ + mul x14, x4, x12; \ + adcs x19, x19, x14; \ + mul x14, x4, x13; \ + adcs x20, x20, x14; \ + cset x21, hs; \ + umulh x14, x4, x5; \ + adds x24, x24, x14; \ + umulh x14, x4, x6; \ + adcs x1, x1, x14; \ + umulh x14, x4, x7; \ + adcs x0, x0, x14; \ + umulh x14, x4, x8; \ + adcs x15, x15, x14; \ + umulh x14, x4, x9; \ + adcs x16, x16, x14; \ + umulh x14, x4, x10; \ + adcs x17, x17, x14; \ + umulh x14, x4, x11; \ + adcs x19, x19, x14; \ + umulh x14, x4, x12; \ + adcs x20, x20, x14; \ + umulh x14, x4, x13; \ + adc x21, x21, x14; \ + stp x22, x23, [P0+48]; \ + ldr x3, [P1+64]; \ + mul x14, x3, x5; \ + adds x24, x24, x14; \ + mul x14, x3, x6; \ + adcs x1, x1, x14; \ + mul x14, x3, x7; \ + adcs x0, x0, x14; \ + mul x14, x3, x8; \ + adcs x15, x15, x14; \ + mul x14, x3, x9; \ + adcs x16, x16, x14; \ + mul x14, x3, x10; \ + adcs x17, x17, x14; \ + mul x14, x3, x11; \ + adcs x19, x19, x14; \ + mul x14, x3, x12; \ + adcs x20, x20, x14; \ + mul x14, x3, x13; \ + adc x21, x21, x14; \ + umulh x14, x3, x5; \ + adds x1, x1, x14; \ + umulh x14, x3, x6; \ + adcs x0, x0, x14; \ + umulh x14, x3, x7; \ + adcs x15, x15, x14; \ + umulh x14, x3, x8; \ + adcs x16, x16, x14; \ + umulh x14, x3, x9; \ + adcs x17, x17, x14; \ + umulh x14, x3, x10; \ + adcs x19, x19, x14; \ + umulh x14, x3, x11; \ + adcs x20, x20, x14; \ + umulh x14, x3, x12; \ + adc x21, x21, x14; \ + cmp xzr, xzr; \ + ldp x5, x6, [P0]; \ + extr x14, x1, x24, #9; \ + adcs x5, x5, x14; \ + extr x14, x0, x1, #9; \ + adcs x6, x6, x14; \ + ldp x7, x8, [P0+16]; \ + extr x14, x15, x0, #9; \ + adcs x7, x7, x14; \ + extr x14, x16, x15, #9; \ + adcs x8, x8, x14; \ + ldp x9, x10, [P0+32]; \ + extr x14, x17, x16, #9; \ + adcs x9, x9, x14; \ + extr x14, x19, x17, #9; \ + adcs x10, x10, x14; \ + ldp x11, x12, [P0+48]; \ + extr x14, x20, x19, #9; \ + adcs x11, x11, x14; \ + extr x14, x21, x20, #9; \ + adcs x12, x12, x14; \ + orr x13, x24, #0xfffffffffffffe00; \ + lsr x14, x21, #9; \ + adcs x13, x13, x14; \ + sbcs x5, x5, xzr; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + and x13, x13, #0x1ff; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +// Corresponds exactly to bignum_sqr_p521_alt + +#define sqr_p521(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x11, x2, x3; \ + umulh x12, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x10, x2, x4; \ + umulh x13, x2, x4; \ + adds x12, x12, x10; \ + ldp x6, x7, [P1+32]; \ + mul x10, x2, x5; \ + umulh x14, x2, x5; \ + adcs x13, x13, x10; \ + ldp x8, x9, [P1+48]; \ + mul x10, x2, x6; \ + umulh x15, x2, x6; \ + adcs x14, x14, x10; \ + mul x10, x2, x7; \ + umulh x16, x2, x7; \ + adcs x15, x15, x10; \ + mul x10, x2, x8; \ + umulh x17, x2, x8; \ + adcs x16, x16, x10; \ + mul x10, x2, x9; \ + umulh x19, x2, x9; \ + adcs x17, x17, x10; \ + adc x19, x19, xzr; \ + mul x10, x3, x4; \ + adds x13, x13, x10; \ + mul x10, x3, x5; \ + adcs x14, x14, x10; \ + mul x10, x3, x6; \ + adcs x15, x15, x10; \ + mul x10, x3, x7; \ + adcs x16, x16, x10; \ + mul x10, x3, x8; \ + adcs x17, x17, x10; \ + mul x10, x3, x9; \ + adcs x19, x19, x10; \ + cset x20, hs; \ + umulh x10, x3, x4; \ + adds x14, x14, x10; \ + umulh x10, x3, x5; \ + adcs x15, x15, x10; \ + umulh x10, x3, x6; \ + adcs x16, x16, x10; \ + umulh x10, x3, x7; \ + adcs x17, x17, x10; \ + umulh x10, x3, x8; \ + adcs x19, x19, x10; \ + umulh x10, x3, x9; \ + adc x20, x20, x10; \ + mul x10, x6, x7; \ + umulh x21, x6, x7; \ + adds x20, x20, x10; \ + adc x21, x21, xzr; \ + mul x10, x4, x5; \ + adds x15, x15, x10; \ + mul x10, x4, x6; \ + adcs x16, x16, x10; \ + mul x10, x4, x7; \ + adcs x17, x17, x10; \ + mul x10, x4, x8; \ + adcs x19, x19, x10; \ + mul x10, x4, x9; \ + adcs x20, x20, x10; \ + mul x10, x6, x8; \ + adcs x21, x21, x10; \ + cset x22, hs; \ + umulh x10, x4, x5; \ + adds x16, x16, x10; \ + umulh x10, x4, x6; \ + adcs x17, x17, x10; \ + umulh x10, x4, x7; \ + adcs x19, x19, x10; \ + umulh x10, x4, x8; \ + adcs x20, x20, x10; \ + umulh x10, x4, x9; \ + adcs x21, x21, x10; \ + umulh x10, x6, x8; \ + adc x22, x22, x10; \ + mul x10, x7, x8; \ + umulh x23, x7, x8; \ + adds x22, x22, x10; \ + adc x23, x23, xzr; \ + mul x10, x5, x6; \ + adds x17, x17, x10; \ + mul x10, x5, x7; \ + adcs x19, x19, x10; \ + mul x10, x5, x8; \ + adcs x20, x20, x10; \ + mul x10, x5, x9; \ + adcs x21, x21, x10; \ + mul x10, x6, x9; \ + adcs x22, x22, x10; \ + mul x10, x7, x9; \ + adcs x23, x23, x10; \ + cset x24, hs; \ + umulh x10, x5, x6; \ + adds x19, x19, x10; \ + umulh x10, x5, x7; \ + adcs x20, x20, x10; \ + umulh x10, x5, x8; \ + adcs x21, x21, x10; \ + umulh x10, x5, x9; \ + adcs x22, x22, x10; \ + umulh x10, x6, x9; \ + adcs x23, x23, x10; \ + umulh x10, x7, x9; \ + adc x24, x24, x10; \ + mul x10, x8, x9; \ + umulh x25, x8, x9; \ + adds x24, x24, x10; \ + adc x25, x25, xzr; \ + adds x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + adcs x20, x20, x20; \ + adcs x21, x21, x21; \ + adcs x22, x22, x22; \ + adcs x23, x23, x23; \ + adcs x24, x24, x24; \ + adcs x25, x25, x25; \ + cset x0, hs; \ + umulh x10, x2, x2; \ + adds x11, x11, x10; \ + mul x10, x3, x3; \ + adcs x12, x12, x10; \ + umulh x10, x3, x3; \ + adcs x13, x13, x10; \ + mul x10, x4, x4; \ + adcs x14, x14, x10; \ + umulh x10, x4, x4; \ + adcs x15, x15, x10; \ + mul x10, x5, x5; \ + adcs x16, x16, x10; \ + umulh x10, x5, x5; \ + adcs x17, x17, x10; \ + mul x10, x6, x6; \ + adcs x19, x19, x10; \ + umulh x10, x6, x6; \ + adcs x20, x20, x10; \ + mul x10, x7, x7; \ + adcs x21, x21, x10; \ + umulh x10, x7, x7; \ + adcs x22, x22, x10; \ + mul x10, x8, x8; \ + adcs x23, x23, x10; \ + umulh x10, x8, x8; \ + adcs x24, x24, x10; \ + mul x10, x9, x9; \ + adcs x25, x25, x10; \ + umulh x10, x9, x9; \ + adc x0, x0, x10; \ + ldr x1, [P1+64]; \ + add x1, x1, x1; \ + mul x10, x1, x2; \ + adds x19, x19, x10; \ + umulh x10, x1, x2; \ + adcs x20, x20, x10; \ + mul x10, x1, x4; \ + adcs x21, x21, x10; \ + umulh x10, x1, x4; \ + adcs x22, x22, x10; \ + mul x10, x1, x6; \ + adcs x23, x23, x10; \ + umulh x10, x1, x6; \ + adcs x24, x24, x10; \ + mul x10, x1, x8; \ + adcs x25, x25, x10; \ + umulh x10, x1, x8; \ + adcs x0, x0, x10; \ + lsr x4, x1, #1; \ + mul x4, x4, x4; \ + adc x4, x4, xzr; \ + mul x10, x1, x3; \ + adds x20, x20, x10; \ + umulh x10, x1, x3; \ + adcs x21, x21, x10; \ + mul x10, x1, x5; \ + adcs x22, x22, x10; \ + umulh x10, x1, x5; \ + adcs x23, x23, x10; \ + mul x10, x1, x7; \ + adcs x24, x24, x10; \ + umulh x10, x1, x7; \ + adcs x25, x25, x10; \ + mul x10, x1, x9; \ + adcs x0, x0, x10; \ + umulh x10, x1, x9; \ + adc x4, x4, x10; \ + mul x2, x2, x2; \ + cmp xzr, xzr; \ + extr x10, x20, x19, #9; \ + adcs x2, x2, x10; \ + extr x10, x21, x20, #9; \ + adcs x11, x11, x10; \ + extr x10, x22, x21, #9; \ + adcs x12, x12, x10; \ + extr x10, x23, x22, #9; \ + adcs x13, x13, x10; \ + extr x10, x24, x23, #9; \ + adcs x14, x14, x10; \ + extr x10, x25, x24, #9; \ + adcs x15, x15, x10; \ + extr x10, x0, x25, #9; \ + adcs x16, x16, x10; \ + extr x10, x4, x0, #9; \ + adcs x17, x17, x10; \ + orr x19, x19, #0xfffffffffffffe00; \ + lsr x10, x4, #9; \ + adcs x19, x19, x10; \ + sbcs x2, x2, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbcs x15, x15, xzr; \ + sbcs x16, x16, xzr; \ + sbcs x17, x17, xzr; \ + sbc x19, x19, xzr; \ + and x19, x19, #0x1ff; \ + stp x2, x11, [P0]; \ + stp x12, x13, [P0+16]; \ + stp x14, x15, [P0+32]; \ + stp x16, x17, [P0+48]; \ + str x19, [P0+64] + +// Corresponds exactly to bignum_sub_p521 + +#define sub_p521(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + ldp x11, x12, [P1+48]; \ + ldp x4, x3, [P2+48]; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + ldr x13, [P1+64]; \ + ldr x4, [P2+64]; \ + sbcs x13, x13, x4; \ + sbcs x5, x5, xzr; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + and x13, x13, #0x1ff; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +S2N_BN_SYMBOL(p521_jadd): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x28, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + mov input_y, x2 + +// Main code, just a sequence of basic field operations + + sqr_p521(z1sq,z_1) + sqr_p521(z2sq,z_2) + + mul_p521(y1a,z_2,y_1) + mul_p521(y2a,z_1,y_2) + + mul_p521(x2a,z1sq,x_2) + mul_p521(x1a,z2sq,x_1) + mul_p521(y2a,z1sq,y2a) + mul_p521(y1a,z2sq,y1a) + + sub_p521(xd,x2a,x1a) + sub_p521(yd,y2a,y1a) + + sqr_p521(zz,xd) + sqr_p521(ww,yd) + + mul_p521(zzx1,zz,x1a) + mul_p521(zzx2,zz,x2a) + + sub_p521(x_3,ww,zzx1) + sub_p521(t1,zzx2,zzx1) + + mul_p521(xd,xd,z_1) + + sub_p521(x_3,x_3,zzx2) + + sub_p521(t2,zzx1,x_3) + + mul_p521(t1,t1,y1a) + mul_p521(z_3,xd,z_2) + mul_p521(t2,yd,t2) + + sub_p521(y_3,t2,t1) + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x27, x28, [sp], 16 + ldp x25, x26, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/p521/p521_jdouble.S b/arm/p521/p521_jdouble.S new file mode 100644 index 0000000000..242b492a81 --- /dev/null +++ b/arm/p521/p521_jdouble.S @@ -0,0 +1,1470 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point doubling on NIST curve P-521 in Jacobian coordinates +// +// extern void p521_jdouble +// (uint64_t p3[static 27],uint64_t p1[static 27]); +// +// Does p3 := 2 * p1 where all points are regarded as Jacobian triples. +// A Jacobian triple (x,y,z) represents affine point (x/z^2,y/z^3). +// It is assumed that all coordinates of the input point are fully +// reduced mod p_521 and that the z coordinate is not zero. +// +// Standard ARM ABI: X0 = p3, X1 = p1 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p521_jdouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p521_jdouble) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 72 + +// Stable homes for input arguments during main code sequence + +#define input_z x26 +#define input_x x27 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries + +#define z2 sp, #(NUMSIZE*0) +#define y2 sp, #(NUMSIZE*1) +#define x2p sp, #(NUMSIZE*2) +#define xy2 sp, #(NUMSIZE*3) + +#define y4 sp, #(NUMSIZE*4) +#define t2 sp, #(NUMSIZE*4) + +#define dx2 sp, #(NUMSIZE*5) +#define t1 sp, #(NUMSIZE*5) + +#define d sp, #(NUMSIZE*6) +#define x4p sp, #(NUMSIZE*6) + +// NUMSIZE*7 is not 16-aligned so we round it up + +#define NSPACE (NUMSIZE*7+8) + +// Corresponds exactly to bignum_mul_p521_alt + +#define mul_p521(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x15, x3, x5; \ + umulh x16, x3, x5; \ + mul x14, x3, x6; \ + umulh x17, x3, x6; \ + adds x16, x16, x14; \ + ldp x7, x8, [P2+16]; \ + mul x14, x3, x7; \ + umulh x19, x3, x7; \ + adcs x17, x17, x14; \ + mul x14, x3, x8; \ + umulh x20, x3, x8; \ + adcs x19, x19, x14; \ + ldp x9, x10, [P2+32]; \ + mul x14, x3, x9; \ + umulh x21, x3, x9; \ + adcs x20, x20, x14; \ + mul x14, x3, x10; \ + umulh x22, x3, x10; \ + adcs x21, x21, x14; \ + ldp x11, x12, [P2+48]; \ + mul x14, x3, x11; \ + umulh x23, x3, x11; \ + adcs x22, x22, x14; \ + ldr x13, [P2+64]; \ + mul x14, x3, x12; \ + umulh x24, x3, x12; \ + adcs x23, x23, x14; \ + mul x14, x3, x13; \ + umulh x1, x3, x13; \ + adcs x24, x24, x14; \ + adc x1, x1, xzr; \ + mul x14, x4, x5; \ + adds x16, x16, x14; \ + mul x14, x4, x6; \ + adcs x17, x17, x14; \ + mul x14, x4, x7; \ + adcs x19, x19, x14; \ + mul x14, x4, x8; \ + adcs x20, x20, x14; \ + mul x14, x4, x9; \ + adcs x21, x21, x14; \ + mul x14, x4, x10; \ + adcs x22, x22, x14; \ + mul x14, x4, x11; \ + adcs x23, x23, x14; \ + mul x14, x4, x12; \ + adcs x24, x24, x14; \ + mul x14, x4, x13; \ + adcs x1, x1, x14; \ + cset x0, hs; \ + umulh x14, x4, x5; \ + adds x17, x17, x14; \ + umulh x14, x4, x6; \ + adcs x19, x19, x14; \ + umulh x14, x4, x7; \ + adcs x20, x20, x14; \ + umulh x14, x4, x8; \ + adcs x21, x21, x14; \ + umulh x14, x4, x9; \ + adcs x22, x22, x14; \ + umulh x14, x4, x10; \ + adcs x23, x23, x14; \ + umulh x14, x4, x11; \ + adcs x24, x24, x14; \ + umulh x14, x4, x12; \ + adcs x1, x1, x14; \ + umulh x14, x4, x13; \ + adc x0, x0, x14; \ + stp x15, x16, [P0]; \ + ldp x3, x4, [P1+16]; \ + mul x14, x3, x5; \ + adds x17, x17, x14; \ + mul x14, x3, x6; \ + adcs x19, x19, x14; \ + mul x14, x3, x7; \ + adcs x20, x20, x14; \ + mul x14, x3, x8; \ + adcs x21, x21, x14; \ + mul x14, x3, x9; \ + adcs x22, x22, x14; \ + mul x14, x3, x10; \ + adcs x23, x23, x14; \ + mul x14, x3, x11; \ + adcs x24, x24, x14; \ + mul x14, x3, x12; \ + adcs x1, x1, x14; \ + mul x14, x3, x13; \ + adcs x0, x0, x14; \ + cset x15, hs; \ + umulh x14, x3, x5; \ + adds x19, x19, x14; \ + umulh x14, x3, x6; \ + adcs x20, x20, x14; \ + umulh x14, x3, x7; \ + adcs x21, x21, x14; \ + umulh x14, x3, x8; \ + adcs x22, x22, x14; \ + umulh x14, x3, x9; \ + adcs x23, x23, x14; \ + umulh x14, x3, x10; \ + adcs x24, x24, x14; \ + umulh x14, x3, x11; \ + adcs x1, x1, x14; \ + umulh x14, x3, x12; \ + adcs x0, x0, x14; \ + umulh x14, x3, x13; \ + adc x15, x15, x14; \ + mul x14, x4, x5; \ + adds x19, x19, x14; \ + mul x14, x4, x6; \ + adcs x20, x20, x14; \ + mul x14, x4, x7; \ + adcs x21, x21, x14; \ + mul x14, x4, x8; \ + adcs x22, x22, x14; \ + mul x14, x4, x9; \ + adcs x23, x23, x14; \ + mul x14, x4, x10; \ + adcs x24, x24, x14; \ + mul x14, x4, x11; \ + adcs x1, x1, x14; \ + mul x14, x4, x12; \ + adcs x0, x0, x14; \ + mul x14, x4, x13; \ + adcs x15, x15, x14; \ + cset x16, hs; \ + umulh x14, x4, x5; \ + adds x20, x20, x14; \ + umulh x14, x4, x6; \ + adcs x21, x21, x14; \ + umulh x14, x4, x7; \ + adcs x22, x22, x14; \ + umulh x14, x4, x8; \ + adcs x23, x23, x14; \ + umulh x14, x4, x9; \ + adcs x24, x24, x14; \ + umulh x14, x4, x10; \ + adcs x1, x1, x14; \ + umulh x14, x4, x11; \ + adcs x0, x0, x14; \ + umulh x14, x4, x12; \ + adcs x15, x15, x14; \ + umulh x14, x4, x13; \ + adc x16, x16, x14; \ + stp x17, x19, [P0+16]; \ + ldp x3, x4, [P1+32]; \ + mul x14, x3, x5; \ + adds x20, x20, x14; \ + mul x14, x3, x6; \ + adcs x21, x21, x14; \ + mul x14, x3, x7; \ + adcs x22, x22, x14; \ + mul x14, x3, x8; \ + adcs x23, x23, x14; \ + mul x14, x3, x9; \ + adcs x24, x24, x14; \ + mul x14, x3, x10; \ + adcs x1, x1, x14; \ + mul x14, x3, x11; \ + adcs x0, x0, x14; \ + mul x14, x3, x12; \ + adcs x15, x15, x14; \ + mul x14, x3, x13; \ + adcs x16, x16, x14; \ + cset x17, hs; \ + umulh x14, x3, x5; \ + adds x21, x21, x14; \ + umulh x14, x3, x6; \ + adcs x22, x22, x14; \ + umulh x14, x3, x7; \ + adcs x23, x23, x14; \ + umulh x14, x3, x8; \ + adcs x24, x24, x14; \ + umulh x14, x3, x9; \ + adcs x1, x1, x14; \ + umulh x14, x3, x10; \ + adcs x0, x0, x14; \ + umulh x14, x3, x11; \ + adcs x15, x15, x14; \ + umulh x14, x3, x12; \ + adcs x16, x16, x14; \ + umulh x14, x3, x13; \ + adc x17, x17, x14; \ + mul x14, x4, x5; \ + adds x21, x21, x14; \ + mul x14, x4, x6; \ + adcs x22, x22, x14; \ + mul x14, x4, x7; \ + adcs x23, x23, x14; \ + mul x14, x4, x8; \ + adcs x24, x24, x14; \ + mul x14, x4, x9; \ + adcs x1, x1, x14; \ + mul x14, x4, x10; \ + adcs x0, x0, x14; \ + mul x14, x4, x11; \ + adcs x15, x15, x14; \ + mul x14, x4, x12; \ + adcs x16, x16, x14; \ + mul x14, x4, x13; \ + adcs x17, x17, x14; \ + cset x19, hs; \ + umulh x14, x4, x5; \ + adds x22, x22, x14; \ + umulh x14, x4, x6; \ + adcs x23, x23, x14; \ + umulh x14, x4, x7; \ + adcs x24, x24, x14; \ + umulh x14, x4, x8; \ + adcs x1, x1, x14; \ + umulh x14, x4, x9; \ + adcs x0, x0, x14; \ + umulh x14, x4, x10; \ + adcs x15, x15, x14; \ + umulh x14, x4, x11; \ + adcs x16, x16, x14; \ + umulh x14, x4, x12; \ + adcs x17, x17, x14; \ + umulh x14, x4, x13; \ + adc x19, x19, x14; \ + stp x20, x21, [P0+32]; \ + ldp x3, x4, [P1+48]; \ + mul x14, x3, x5; \ + adds x22, x22, x14; \ + mul x14, x3, x6; \ + adcs x23, x23, x14; \ + mul x14, x3, x7; \ + adcs x24, x24, x14; \ + mul x14, x3, x8; \ + adcs x1, x1, x14; \ + mul x14, x3, x9; \ + adcs x0, x0, x14; \ + mul x14, x3, x10; \ + adcs x15, x15, x14; \ + mul x14, x3, x11; \ + adcs x16, x16, x14; \ + mul x14, x3, x12; \ + adcs x17, x17, x14; \ + mul x14, x3, x13; \ + adcs x19, x19, x14; \ + cset x20, hs; \ + umulh x14, x3, x5; \ + adds x23, x23, x14; \ + umulh x14, x3, x6; \ + adcs x24, x24, x14; \ + umulh x14, x3, x7; \ + adcs x1, x1, x14; \ + umulh x14, x3, x8; \ + adcs x0, x0, x14; \ + umulh x14, x3, x9; \ + adcs x15, x15, x14; \ + umulh x14, x3, x10; \ + adcs x16, x16, x14; \ + umulh x14, x3, x11; \ + adcs x17, x17, x14; \ + umulh x14, x3, x12; \ + adcs x19, x19, x14; \ + umulh x14, x3, x13; \ + adc x20, x20, x14; \ + mul x14, x4, x5; \ + adds x23, x23, x14; \ + mul x14, x4, x6; \ + adcs x24, x24, x14; \ + mul x14, x4, x7; \ + adcs x1, x1, x14; \ + mul x14, x4, x8; \ + adcs x0, x0, x14; \ + mul x14, x4, x9; \ + adcs x15, x15, x14; \ + mul x14, x4, x10; \ + adcs x16, x16, x14; \ + mul x14, x4, x11; \ + adcs x17, x17, x14; \ + mul x14, x4, x12; \ + adcs x19, x19, x14; \ + mul x14, x4, x13; \ + adcs x20, x20, x14; \ + cset x21, hs; \ + umulh x14, x4, x5; \ + adds x24, x24, x14; \ + umulh x14, x4, x6; \ + adcs x1, x1, x14; \ + umulh x14, x4, x7; \ + adcs x0, x0, x14; \ + umulh x14, x4, x8; \ + adcs x15, x15, x14; \ + umulh x14, x4, x9; \ + adcs x16, x16, x14; \ + umulh x14, x4, x10; \ + adcs x17, x17, x14; \ + umulh x14, x4, x11; \ + adcs x19, x19, x14; \ + umulh x14, x4, x12; \ + adcs x20, x20, x14; \ + umulh x14, x4, x13; \ + adc x21, x21, x14; \ + stp x22, x23, [P0+48]; \ + ldr x3, [P1+64]; \ + mul x14, x3, x5; \ + adds x24, x24, x14; \ + mul x14, x3, x6; \ + adcs x1, x1, x14; \ + mul x14, x3, x7; \ + adcs x0, x0, x14; \ + mul x14, x3, x8; \ + adcs x15, x15, x14; \ + mul x14, x3, x9; \ + adcs x16, x16, x14; \ + mul x14, x3, x10; \ + adcs x17, x17, x14; \ + mul x14, x3, x11; \ + adcs x19, x19, x14; \ + mul x14, x3, x12; \ + adcs x20, x20, x14; \ + mul x14, x3, x13; \ + adc x21, x21, x14; \ + umulh x14, x3, x5; \ + adds x1, x1, x14; \ + umulh x14, x3, x6; \ + adcs x0, x0, x14; \ + umulh x14, x3, x7; \ + adcs x15, x15, x14; \ + umulh x14, x3, x8; \ + adcs x16, x16, x14; \ + umulh x14, x3, x9; \ + adcs x17, x17, x14; \ + umulh x14, x3, x10; \ + adcs x19, x19, x14; \ + umulh x14, x3, x11; \ + adcs x20, x20, x14; \ + umulh x14, x3, x12; \ + adc x21, x21, x14; \ + cmp xzr, xzr; \ + ldp x5, x6, [P0]; \ + extr x14, x1, x24, #9; \ + adcs x5, x5, x14; \ + extr x14, x0, x1, #9; \ + adcs x6, x6, x14; \ + ldp x7, x8, [P0+16]; \ + extr x14, x15, x0, #9; \ + adcs x7, x7, x14; \ + extr x14, x16, x15, #9; \ + adcs x8, x8, x14; \ + ldp x9, x10, [P0+32]; \ + extr x14, x17, x16, #9; \ + adcs x9, x9, x14; \ + extr x14, x19, x17, #9; \ + adcs x10, x10, x14; \ + ldp x11, x12, [P0+48]; \ + extr x14, x20, x19, #9; \ + adcs x11, x11, x14; \ + extr x14, x21, x20, #9; \ + adcs x12, x12, x14; \ + orr x13, x24, #0xfffffffffffffe00; \ + lsr x14, x21, #9; \ + adcs x13, x13, x14; \ + sbcs x5, x5, xzr; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + and x13, x13, #0x1ff; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +// Corresponds exactly to bignum_sqr_p521_alt + +#define sqr_p521(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x11, x2, x3; \ + umulh x12, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x10, x2, x4; \ + umulh x13, x2, x4; \ + adds x12, x12, x10; \ + ldp x6, x7, [P1+32]; \ + mul x10, x2, x5; \ + umulh x14, x2, x5; \ + adcs x13, x13, x10; \ + ldp x8, x9, [P1+48]; \ + mul x10, x2, x6; \ + umulh x15, x2, x6; \ + adcs x14, x14, x10; \ + mul x10, x2, x7; \ + umulh x16, x2, x7; \ + adcs x15, x15, x10; \ + mul x10, x2, x8; \ + umulh x17, x2, x8; \ + adcs x16, x16, x10; \ + mul x10, x2, x9; \ + umulh x19, x2, x9; \ + adcs x17, x17, x10; \ + adc x19, x19, xzr; \ + mul x10, x3, x4; \ + adds x13, x13, x10; \ + mul x10, x3, x5; \ + adcs x14, x14, x10; \ + mul x10, x3, x6; \ + adcs x15, x15, x10; \ + mul x10, x3, x7; \ + adcs x16, x16, x10; \ + mul x10, x3, x8; \ + adcs x17, x17, x10; \ + mul x10, x3, x9; \ + adcs x19, x19, x10; \ + cset x20, hs; \ + umulh x10, x3, x4; \ + adds x14, x14, x10; \ + umulh x10, x3, x5; \ + adcs x15, x15, x10; \ + umulh x10, x3, x6; \ + adcs x16, x16, x10; \ + umulh x10, x3, x7; \ + adcs x17, x17, x10; \ + umulh x10, x3, x8; \ + adcs x19, x19, x10; \ + umulh x10, x3, x9; \ + adc x20, x20, x10; \ + mul x10, x6, x7; \ + umulh x21, x6, x7; \ + adds x20, x20, x10; \ + adc x21, x21, xzr; \ + mul x10, x4, x5; \ + adds x15, x15, x10; \ + mul x10, x4, x6; \ + adcs x16, x16, x10; \ + mul x10, x4, x7; \ + adcs x17, x17, x10; \ + mul x10, x4, x8; \ + adcs x19, x19, x10; \ + mul x10, x4, x9; \ + adcs x20, x20, x10; \ + mul x10, x6, x8; \ + adcs x21, x21, x10; \ + cset x22, hs; \ + umulh x10, x4, x5; \ + adds x16, x16, x10; \ + umulh x10, x4, x6; \ + adcs x17, x17, x10; \ + umulh x10, x4, x7; \ + adcs x19, x19, x10; \ + umulh x10, x4, x8; \ + adcs x20, x20, x10; \ + umulh x10, x4, x9; \ + adcs x21, x21, x10; \ + umulh x10, x6, x8; \ + adc x22, x22, x10; \ + mul x10, x7, x8; \ + umulh x23, x7, x8; \ + adds x22, x22, x10; \ + adc x23, x23, xzr; \ + mul x10, x5, x6; \ + adds x17, x17, x10; \ + mul x10, x5, x7; \ + adcs x19, x19, x10; \ + mul x10, x5, x8; \ + adcs x20, x20, x10; \ + mul x10, x5, x9; \ + adcs x21, x21, x10; \ + mul x10, x6, x9; \ + adcs x22, x22, x10; \ + mul x10, x7, x9; \ + adcs x23, x23, x10; \ + cset x24, hs; \ + umulh x10, x5, x6; \ + adds x19, x19, x10; \ + umulh x10, x5, x7; \ + adcs x20, x20, x10; \ + umulh x10, x5, x8; \ + adcs x21, x21, x10; \ + umulh x10, x5, x9; \ + adcs x22, x22, x10; \ + umulh x10, x6, x9; \ + adcs x23, x23, x10; \ + umulh x10, x7, x9; \ + adc x24, x24, x10; \ + mul x10, x8, x9; \ + umulh x25, x8, x9; \ + adds x24, x24, x10; \ + adc x25, x25, xzr; \ + adds x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + adcs x20, x20, x20; \ + adcs x21, x21, x21; \ + adcs x22, x22, x22; \ + adcs x23, x23, x23; \ + adcs x24, x24, x24; \ + adcs x25, x25, x25; \ + cset x0, hs; \ + umulh x10, x2, x2; \ + adds x11, x11, x10; \ + mul x10, x3, x3; \ + adcs x12, x12, x10; \ + umulh x10, x3, x3; \ + adcs x13, x13, x10; \ + mul x10, x4, x4; \ + adcs x14, x14, x10; \ + umulh x10, x4, x4; \ + adcs x15, x15, x10; \ + mul x10, x5, x5; \ + adcs x16, x16, x10; \ + umulh x10, x5, x5; \ + adcs x17, x17, x10; \ + mul x10, x6, x6; \ + adcs x19, x19, x10; \ + umulh x10, x6, x6; \ + adcs x20, x20, x10; \ + mul x10, x7, x7; \ + adcs x21, x21, x10; \ + umulh x10, x7, x7; \ + adcs x22, x22, x10; \ + mul x10, x8, x8; \ + adcs x23, x23, x10; \ + umulh x10, x8, x8; \ + adcs x24, x24, x10; \ + mul x10, x9, x9; \ + adcs x25, x25, x10; \ + umulh x10, x9, x9; \ + adc x0, x0, x10; \ + ldr x1, [P1+64]; \ + add x1, x1, x1; \ + mul x10, x1, x2; \ + adds x19, x19, x10; \ + umulh x10, x1, x2; \ + adcs x20, x20, x10; \ + mul x10, x1, x4; \ + adcs x21, x21, x10; \ + umulh x10, x1, x4; \ + adcs x22, x22, x10; \ + mul x10, x1, x6; \ + adcs x23, x23, x10; \ + umulh x10, x1, x6; \ + adcs x24, x24, x10; \ + mul x10, x1, x8; \ + adcs x25, x25, x10; \ + umulh x10, x1, x8; \ + adcs x0, x0, x10; \ + lsr x4, x1, #1; \ + mul x4, x4, x4; \ + adc x4, x4, xzr; \ + mul x10, x1, x3; \ + adds x20, x20, x10; \ + umulh x10, x1, x3; \ + adcs x21, x21, x10; \ + mul x10, x1, x5; \ + adcs x22, x22, x10; \ + umulh x10, x1, x5; \ + adcs x23, x23, x10; \ + mul x10, x1, x7; \ + adcs x24, x24, x10; \ + umulh x10, x1, x7; \ + adcs x25, x25, x10; \ + mul x10, x1, x9; \ + adcs x0, x0, x10; \ + umulh x10, x1, x9; \ + adc x4, x4, x10; \ + mul x2, x2, x2; \ + cmp xzr, xzr; \ + extr x10, x20, x19, #9; \ + adcs x2, x2, x10; \ + extr x10, x21, x20, #9; \ + adcs x11, x11, x10; \ + extr x10, x22, x21, #9; \ + adcs x12, x12, x10; \ + extr x10, x23, x22, #9; \ + adcs x13, x13, x10; \ + extr x10, x24, x23, #9; \ + adcs x14, x14, x10; \ + extr x10, x25, x24, #9; \ + adcs x15, x15, x10; \ + extr x10, x0, x25, #9; \ + adcs x16, x16, x10; \ + extr x10, x4, x0, #9; \ + adcs x17, x17, x10; \ + orr x19, x19, #0xfffffffffffffe00; \ + lsr x10, x4, #9; \ + adcs x19, x19, x10; \ + sbcs x2, x2, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbcs x15, x15, xzr; \ + sbcs x16, x16, xzr; \ + sbcs x17, x17, xzr; \ + sbc x19, x19, xzr; \ + and x19, x19, #0x1ff; \ + stp x2, x11, [P0]; \ + stp x12, x13, [P0+16]; \ + stp x14, x15, [P0+32]; \ + stp x16, x17, [P0+48]; \ + str x19, [P0+64] + +// Corresponds exactly to bignum_add_p521 + +#define add_p521(P0,P1,P2) \ + cmp xzr, xzr; \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + adcs x5, x5, x4; \ + adcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + adcs x9, x9, x4; \ + adcs x10, x10, x3; \ + ldp x11, x12, [P1+48]; \ + ldp x4, x3, [P2+48]; \ + adcs x11, x11, x4; \ + adcs x12, x12, x3; \ + ldr x13, [P1+64]; \ + ldr x4, [P2+64]; \ + adc x13, x13, x4; \ + subs x4, x13, #512; \ + csetm x4, hs; \ + sbcs x5, x5, xzr; \ + and x4, x4, #0x200; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, x4; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +// Corresponds exactly to bignum_sub_p521 + +#define sub_p521(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + ldp x11, x12, [P1+48]; \ + ldp x4, x3, [P2+48]; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + ldr x13, [P1+64]; \ + ldr x4, [P2+64]; \ + sbcs x13, x13, x4; \ + sbcs x5, x5, xzr; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + and x13, x13, #0x1ff; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +// Weak multiplication not fully reducing + +#define weakmul_p521(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x15, x3, x5; \ + umulh x16, x3, x5; \ + mul x14, x3, x6; \ + umulh x17, x3, x6; \ + adds x16, x16, x14; \ + ldp x7, x8, [P2+16]; \ + mul x14, x3, x7; \ + umulh x19, x3, x7; \ + adcs x17, x17, x14; \ + mul x14, x3, x8; \ + umulh x20, x3, x8; \ + adcs x19, x19, x14; \ + ldp x9, x10, [P2+32]; \ + mul x14, x3, x9; \ + umulh x21, x3, x9; \ + adcs x20, x20, x14; \ + mul x14, x3, x10; \ + umulh x22, x3, x10; \ + adcs x21, x21, x14; \ + ldp x11, x12, [P2+48]; \ + mul x14, x3, x11; \ + umulh x23, x3, x11; \ + adcs x22, x22, x14; \ + ldr x13, [P2+64]; \ + mul x14, x3, x12; \ + umulh x24, x3, x12; \ + adcs x23, x23, x14; \ + mul x14, x3, x13; \ + umulh x1, x3, x13; \ + adcs x24, x24, x14; \ + adc x1, x1, xzr; \ + mul x14, x4, x5; \ + adds x16, x16, x14; \ + mul x14, x4, x6; \ + adcs x17, x17, x14; \ + mul x14, x4, x7; \ + adcs x19, x19, x14; \ + mul x14, x4, x8; \ + adcs x20, x20, x14; \ + mul x14, x4, x9; \ + adcs x21, x21, x14; \ + mul x14, x4, x10; \ + adcs x22, x22, x14; \ + mul x14, x4, x11; \ + adcs x23, x23, x14; \ + mul x14, x4, x12; \ + adcs x24, x24, x14; \ + mul x14, x4, x13; \ + adcs x1, x1, x14; \ + cset x0, hs; \ + umulh x14, x4, x5; \ + adds x17, x17, x14; \ + umulh x14, x4, x6; \ + adcs x19, x19, x14; \ + umulh x14, x4, x7; \ + adcs x20, x20, x14; \ + umulh x14, x4, x8; \ + adcs x21, x21, x14; \ + umulh x14, x4, x9; \ + adcs x22, x22, x14; \ + umulh x14, x4, x10; \ + adcs x23, x23, x14; \ + umulh x14, x4, x11; \ + adcs x24, x24, x14; \ + umulh x14, x4, x12; \ + adcs x1, x1, x14; \ + umulh x14, x4, x13; \ + adc x0, x0, x14; \ + stp x15, x16, [P0]; \ + ldp x3, x4, [P1+16]; \ + mul x14, x3, x5; \ + adds x17, x17, x14; \ + mul x14, x3, x6; \ + adcs x19, x19, x14; \ + mul x14, x3, x7; \ + adcs x20, x20, x14; \ + mul x14, x3, x8; \ + adcs x21, x21, x14; \ + mul x14, x3, x9; \ + adcs x22, x22, x14; \ + mul x14, x3, x10; \ + adcs x23, x23, x14; \ + mul x14, x3, x11; \ + adcs x24, x24, x14; \ + mul x14, x3, x12; \ + adcs x1, x1, x14; \ + mul x14, x3, x13; \ + adcs x0, x0, x14; \ + cset x15, hs; \ + umulh x14, x3, x5; \ + adds x19, x19, x14; \ + umulh x14, x3, x6; \ + adcs x20, x20, x14; \ + umulh x14, x3, x7; \ + adcs x21, x21, x14; \ + umulh x14, x3, x8; \ + adcs x22, x22, x14; \ + umulh x14, x3, x9; \ + adcs x23, x23, x14; \ + umulh x14, x3, x10; \ + adcs x24, x24, x14; \ + umulh x14, x3, x11; \ + adcs x1, x1, x14; \ + umulh x14, x3, x12; \ + adcs x0, x0, x14; \ + umulh x14, x3, x13; \ + adc x15, x15, x14; \ + mul x14, x4, x5; \ + adds x19, x19, x14; \ + mul x14, x4, x6; \ + adcs x20, x20, x14; \ + mul x14, x4, x7; \ + adcs x21, x21, x14; \ + mul x14, x4, x8; \ + adcs x22, x22, x14; \ + mul x14, x4, x9; \ + adcs x23, x23, x14; \ + mul x14, x4, x10; \ + adcs x24, x24, x14; \ + mul x14, x4, x11; \ + adcs x1, x1, x14; \ + mul x14, x4, x12; \ + adcs x0, x0, x14; \ + mul x14, x4, x13; \ + adcs x15, x15, x14; \ + cset x16, hs; \ + umulh x14, x4, x5; \ + adds x20, x20, x14; \ + umulh x14, x4, x6; \ + adcs x21, x21, x14; \ + umulh x14, x4, x7; \ + adcs x22, x22, x14; \ + umulh x14, x4, x8; \ + adcs x23, x23, x14; \ + umulh x14, x4, x9; \ + adcs x24, x24, x14; \ + umulh x14, x4, x10; \ + adcs x1, x1, x14; \ + umulh x14, x4, x11; \ + adcs x0, x0, x14; \ + umulh x14, x4, x12; \ + adcs x15, x15, x14; \ + umulh x14, x4, x13; \ + adc x16, x16, x14; \ + stp x17, x19, [P0+16]; \ + ldp x3, x4, [P1+32]; \ + mul x14, x3, x5; \ + adds x20, x20, x14; \ + mul x14, x3, x6; \ + adcs x21, x21, x14; \ + mul x14, x3, x7; \ + adcs x22, x22, x14; \ + mul x14, x3, x8; \ + adcs x23, x23, x14; \ + mul x14, x3, x9; \ + adcs x24, x24, x14; \ + mul x14, x3, x10; \ + adcs x1, x1, x14; \ + mul x14, x3, x11; \ + adcs x0, x0, x14; \ + mul x14, x3, x12; \ + adcs x15, x15, x14; \ + mul x14, x3, x13; \ + adcs x16, x16, x14; \ + cset x17, hs; \ + umulh x14, x3, x5; \ + adds x21, x21, x14; \ + umulh x14, x3, x6; \ + adcs x22, x22, x14; \ + umulh x14, x3, x7; \ + adcs x23, x23, x14; \ + umulh x14, x3, x8; \ + adcs x24, x24, x14; \ + umulh x14, x3, x9; \ + adcs x1, x1, x14; \ + umulh x14, x3, x10; \ + adcs x0, x0, x14; \ + umulh x14, x3, x11; \ + adcs x15, x15, x14; \ + umulh x14, x3, x12; \ + adcs x16, x16, x14; \ + umulh x14, x3, x13; \ + adc x17, x17, x14; \ + mul x14, x4, x5; \ + adds x21, x21, x14; \ + mul x14, x4, x6; \ + adcs x22, x22, x14; \ + mul x14, x4, x7; \ + adcs x23, x23, x14; \ + mul x14, x4, x8; \ + adcs x24, x24, x14; \ + mul x14, x4, x9; \ + adcs x1, x1, x14; \ + mul x14, x4, x10; \ + adcs x0, x0, x14; \ + mul x14, x4, x11; \ + adcs x15, x15, x14; \ + mul x14, x4, x12; \ + adcs x16, x16, x14; \ + mul x14, x4, x13; \ + adcs x17, x17, x14; \ + cset x19, hs; \ + umulh x14, x4, x5; \ + adds x22, x22, x14; \ + umulh x14, x4, x6; \ + adcs x23, x23, x14; \ + umulh x14, x4, x7; \ + adcs x24, x24, x14; \ + umulh x14, x4, x8; \ + adcs x1, x1, x14; \ + umulh x14, x4, x9; \ + adcs x0, x0, x14; \ + umulh x14, x4, x10; \ + adcs x15, x15, x14; \ + umulh x14, x4, x11; \ + adcs x16, x16, x14; \ + umulh x14, x4, x12; \ + adcs x17, x17, x14; \ + umulh x14, x4, x13; \ + adc x19, x19, x14; \ + stp x20, x21, [P0+32]; \ + ldp x3, x4, [P1+48]; \ + mul x14, x3, x5; \ + adds x22, x22, x14; \ + mul x14, x3, x6; \ + adcs x23, x23, x14; \ + mul x14, x3, x7; \ + adcs x24, x24, x14; \ + mul x14, x3, x8; \ + adcs x1, x1, x14; \ + mul x14, x3, x9; \ + adcs x0, x0, x14; \ + mul x14, x3, x10; \ + adcs x15, x15, x14; \ + mul x14, x3, x11; \ + adcs x16, x16, x14; \ + mul x14, x3, x12; \ + adcs x17, x17, x14; \ + mul x14, x3, x13; \ + adcs x19, x19, x14; \ + cset x20, hs; \ + umulh x14, x3, x5; \ + adds x23, x23, x14; \ + umulh x14, x3, x6; \ + adcs x24, x24, x14; \ + umulh x14, x3, x7; \ + adcs x1, x1, x14; \ + umulh x14, x3, x8; \ + adcs x0, x0, x14; \ + umulh x14, x3, x9; \ + adcs x15, x15, x14; \ + umulh x14, x3, x10; \ + adcs x16, x16, x14; \ + umulh x14, x3, x11; \ + adcs x17, x17, x14; \ + umulh x14, x3, x12; \ + adcs x19, x19, x14; \ + umulh x14, x3, x13; \ + adc x20, x20, x14; \ + mul x14, x4, x5; \ + adds x23, x23, x14; \ + mul x14, x4, x6; \ + adcs x24, x24, x14; \ + mul x14, x4, x7; \ + adcs x1, x1, x14; \ + mul x14, x4, x8; \ + adcs x0, x0, x14; \ + mul x14, x4, x9; \ + adcs x15, x15, x14; \ + mul x14, x4, x10; \ + adcs x16, x16, x14; \ + mul x14, x4, x11; \ + adcs x17, x17, x14; \ + mul x14, x4, x12; \ + adcs x19, x19, x14; \ + mul x14, x4, x13; \ + adcs x20, x20, x14; \ + cset x21, hs; \ + umulh x14, x4, x5; \ + adds x24, x24, x14; \ + umulh x14, x4, x6; \ + adcs x1, x1, x14; \ + umulh x14, x4, x7; \ + adcs x0, x0, x14; \ + umulh x14, x4, x8; \ + adcs x15, x15, x14; \ + umulh x14, x4, x9; \ + adcs x16, x16, x14; \ + umulh x14, x4, x10; \ + adcs x17, x17, x14; \ + umulh x14, x4, x11; \ + adcs x19, x19, x14; \ + umulh x14, x4, x12; \ + adcs x20, x20, x14; \ + umulh x14, x4, x13; \ + adc x21, x21, x14; \ + stp x22, x23, [P0+48]; \ + ldr x3, [P1+64]; \ + mul x14, x3, x5; \ + adds x24, x24, x14; \ + mul x14, x3, x6; \ + adcs x1, x1, x14; \ + mul x14, x3, x7; \ + adcs x0, x0, x14; \ + mul x14, x3, x8; \ + adcs x15, x15, x14; \ + mul x14, x3, x9; \ + adcs x16, x16, x14; \ + mul x14, x3, x10; \ + adcs x17, x17, x14; \ + mul x14, x3, x11; \ + adcs x19, x19, x14; \ + mul x14, x3, x12; \ + adcs x20, x20, x14; \ + mul x14, x3, x13; \ + adc x21, x21, x14; \ + umulh x14, x3, x5; \ + adds x1, x1, x14; \ + umulh x14, x3, x6; \ + adcs x0, x0, x14; \ + umulh x14, x3, x7; \ + adcs x15, x15, x14; \ + umulh x14, x3, x8; \ + adcs x16, x16, x14; \ + umulh x14, x3, x9; \ + adcs x17, x17, x14; \ + umulh x14, x3, x10; \ + adcs x19, x19, x14; \ + umulh x14, x3, x11; \ + adcs x20, x20, x14; \ + umulh x14, x3, x12; \ + adc x21, x21, x14; \ + ldp x5, x6, [P0]; \ + extr x14, x1, x24, #9; \ + adds x5, x5, x14; \ + extr x14, x0, x1, #9; \ + adcs x6, x6, x14; \ + ldp x7, x8, [P0+16]; \ + extr x14, x15, x0, #9; \ + adcs x7, x7, x14; \ + extr x14, x16, x15, #9; \ + adcs x8, x8, x14; \ + ldp x9, x10, [P0+32]; \ + extr x14, x17, x16, #9; \ + adcs x9, x9, x14; \ + extr x14, x19, x17, #9; \ + adcs x10, x10, x14; \ + ldp x11, x12, [P0+48]; \ + extr x14, x20, x19, #9; \ + adcs x11, x11, x14; \ + extr x14, x21, x20, #9; \ + adcs x12, x12, x14; \ + and x13, x24, #0x1ff; \ + lsr x14, x21, #9; \ + adc x13, x13, x14; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +// P0 = C * P1 - D * P2 == C * P1 + D * (p_521 - P2) + +#define cmsub_p521(P0,C,P1,D,P2) \ + ldp x6, x7, [P1]; \ + mov x1, #(C); \ + mul x3, x1, x6; \ + mul x4, x1, x7; \ + umulh x6, x1, x6; \ + adds x4, x4, x6; \ + umulh x7, x1, x7; \ + ldp x8, x9, [P1+16]; \ + mul x5, x1, x8; \ + mul x6, x1, x9; \ + umulh x8, x1, x8; \ + adcs x5, x5, x7; \ + umulh x9, x1, x9; \ + adcs x6, x6, x8; \ + ldp x10, x11, [P1+32]; \ + mul x7, x1, x10; \ + mul x8, x1, x11; \ + umulh x10, x1, x10; \ + adcs x7, x7, x9; \ + umulh x11, x1, x11; \ + adcs x8, x8, x10; \ + ldp x12, x13, [P1+48]; \ + mul x9, x1, x12; \ + mul x10, x1, x13; \ + umulh x12, x1, x12; \ + adcs x9, x9, x11; \ + umulh x13, x1, x13; \ + adcs x10, x10, x12; \ + ldr x14, [P1+64]; \ + mul x11, x1, x14; \ + adc x11, x11, x13; \ + mov x1, #(D); \ + ldp x20, x21, [P2]; \ + mvn x20, x20; \ + mul x0, x1, x20; \ + umulh x20, x1, x20; \ + adds x3, x3, x0; \ + mvn x21, x21; \ + mul x0, x1, x21; \ + umulh x21, x1, x21; \ + adcs x4, x4, x0; \ + ldp x22, x23, [P2+16]; \ + mvn x22, x22; \ + mul x0, x1, x22; \ + umulh x22, x1, x22; \ + adcs x5, x5, x0; \ + mvn x23, x23; \ + mul x0, x1, x23; \ + umulh x23, x1, x23; \ + adcs x6, x6, x0; \ + ldp x17, x19, [P2+32]; \ + mvn x17, x17; \ + mul x0, x1, x17; \ + umulh x17, x1, x17; \ + adcs x7, x7, x0; \ + mvn x19, x19; \ + mul x0, x1, x19; \ + umulh x19, x1, x19; \ + adcs x8, x8, x0; \ + ldp x2, x16, [P2+48]; \ + mvn x2, x2; \ + mul x0, x1, x2; \ + umulh x2, x1, x2; \ + adcs x9, x9, x0; \ + mvn x16, x16; \ + mul x0, x1, x16; \ + umulh x16, x1, x16; \ + adcs x10, x10, x0; \ + ldr x0, [P2+64]; \ + eor x0, x0, #0x1ff; \ + mul x0, x1, x0; \ + adc x11, x11, x0; \ + adds x4, x4, x20; \ + adcs x5, x5, x21; \ + and x15, x4, x5; \ + adcs x6, x6, x22; \ + and x15, x15, x6; \ + adcs x7, x7, x23; \ + and x15, x15, x7; \ + adcs x8, x8, x17; \ + and x15, x15, x8; \ + adcs x9, x9, x19; \ + and x15, x15, x9; \ + adcs x10, x10, x2; \ + and x15, x15, x10; \ + adc x11, x11, x16; \ + lsr x12, x11, #9; \ + orr x11, x11, #0xfffffffffffffe00; \ + cmp xzr, xzr; \ + adcs xzr, x3, x12; \ + adcs xzr, x15, xzr; \ + adcs xzr, x11, xzr; \ + adcs x3, x3, x12; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adcs x6, x6, xzr; \ + adcs x7, x7, xzr; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + adc x11, x11, xzr; \ + and x11, x11, #0x1ff; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16]; \ + stp x7, x8, [P0+32]; \ + stp x9, x10, [P0+48]; \ + str x11, [P0+64] + +// P0 = 3 * P1 - 8 * P2 == 3 * P1 + 8 * (p_521 - P2) + +#define cmsub38_p521(P0,P1,P2) \ + ldp x6, x7, [P1]; \ + lsl x3, x6, #1; \ + adds x3, x3, x6; \ + extr x4, x7, x6, #63; \ + adcs x4, x4, x7; \ + ldp x8, x9, [P1+16]; \ + extr x5, x8, x7, #63; \ + adcs x5, x5, x8; \ + extr x6, x9, x8, #63; \ + adcs x6, x6, x9; \ + ldp x10, x11, [P1+32]; \ + extr x7, x10, x9, #63; \ + adcs x7, x7, x10; \ + extr x8, x11, x10, #63; \ + adcs x8, x8, x11; \ + ldp x12, x13, [P1+48]; \ + extr x9, x12, x11, #63; \ + adcs x9, x9, x12; \ + extr x10, x13, x12, #63; \ + adcs x10, x10, x13; \ + ldr x14, [P1+64]; \ + extr x11, x14, x13, #63; \ + adc x11, x11, x14; \ + ldp x20, x21, [P2]; \ + mvn x20, x20; \ + lsl x0, x20, #3; \ + adds x3, x3, x0; \ + mvn x21, x21; \ + extr x0, x21, x20, #61; \ + adcs x4, x4, x0; \ + ldp x22, x23, [P2+16]; \ + mvn x22, x22; \ + extr x0, x22, x21, #61; \ + adcs x5, x5, x0; \ + and x15, x4, x5; \ + mvn x23, x23; \ + extr x0, x23, x22, #61; \ + adcs x6, x6, x0; \ + and x15, x15, x6; \ + ldp x20, x21, [P2+32]; \ + mvn x20, x20; \ + extr x0, x20, x23, #61; \ + adcs x7, x7, x0; \ + and x15, x15, x7; \ + mvn x21, x21; \ + extr x0, x21, x20, #61; \ + adcs x8, x8, x0; \ + and x15, x15, x8; \ + ldp x22, x23, [P2+48]; \ + mvn x22, x22; \ + extr x0, x22, x21, #61; \ + adcs x9, x9, x0; \ + and x15, x15, x9; \ + mvn x23, x23; \ + extr x0, x23, x22, #61; \ + adcs x10, x10, x0; \ + and x15, x15, x10; \ + ldr x0, [P2+64]; \ + eor x0, x0, #0x1ff; \ + extr x0, x0, x23, #61; \ + adc x11, x11, x0; \ + lsr x12, x11, #9; \ + orr x11, x11, #0xfffffffffffffe00; \ + cmp xzr, xzr; \ + adcs xzr, x3, x12; \ + adcs xzr, x15, xzr; \ + adcs xzr, x11, xzr; \ + adcs x3, x3, x12; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adcs x6, x6, xzr; \ + adcs x7, x7, xzr; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + adc x11, x11, xzr; \ + and x11, x11, #0x1ff; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16]; \ + stp x7, x8, [P0+32]; \ + stp x9, x10, [P0+48]; \ + str x11, [P0+64] + +// P0 = 4 * P1 - P2 = 4 * P1 + (p_521 - P2) + +#define cmsub41_p521(P0,P1,P2) \ + ldp x6, x7, [P1]; \ + lsl x3, x6, #2; \ + extr x4, x7, x6, #62; \ + ldp x8, x9, [P1+16]; \ + extr x5, x8, x7, #62; \ + extr x6, x9, x8, #62; \ + ldp x10, x11, [P1+32]; \ + extr x7, x10, x9, #62; \ + extr x8, x11, x10, #62; \ + ldp x12, x13, [P1+48]; \ + extr x9, x12, x11, #62; \ + extr x10, x13, x12, #62; \ + ldr x14, [P1+64]; \ + extr x11, x14, x13, #62; \ + ldp x0, x1, [P2]; \ + mvn x0, x0; \ + adds x3, x3, x0; \ + sbcs x4, x4, x1; \ + ldp x0, x1, [P2+16]; \ + sbcs x5, x5, x0; \ + and x15, x4, x5; \ + sbcs x6, x6, x1; \ + and x15, x15, x6; \ + ldp x0, x1, [P2+32]; \ + sbcs x7, x7, x0; \ + and x15, x15, x7; \ + sbcs x8, x8, x1; \ + and x15, x15, x8; \ + ldp x0, x1, [P2+48]; \ + sbcs x9, x9, x0; \ + and x15, x15, x9; \ + sbcs x10, x10, x1; \ + and x15, x15, x10; \ + ldr x0, [P2+64]; \ + eor x0, x0, #0x1ff; \ + adc x11, x11, x0; \ + lsr x12, x11, #9; \ + orr x11, x11, #0xfffffffffffffe00; \ + cmp xzr, xzr; \ + adcs xzr, x3, x12; \ + adcs xzr, x15, xzr; \ + adcs xzr, x11, xzr; \ + adcs x3, x3, x12; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adcs x6, x6, xzr; \ + adcs x7, x7, xzr; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + adc x11, x11, xzr; \ + and x11, x11, #0x1ff; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16]; \ + stp x7, x8, [P0+32]; \ + stp x9, x10, [P0+48]; \ + str x11, [P0+64] + +S2N_BN_SYMBOL(p521_jdouble): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x28, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + +// Main code, just a sequence of basic field operations + +// z2 = z^2 +// y2 = y^2 + + sqr_p521(z2,z_1) + sqr_p521(y2,y_1) + +// x2p = x^2 - z^4 = (x + z^2) * (x - z^2) + + add_p521(t1,x_1,z2) + sub_p521(t2,x_1,z2) + mul_p521(x2p,t1,t2) + +// t1 = y + z +// x4p = x2p^2 +// xy2 = x * y^2 + + add_p521(t1,y_1,z_1) + sqr_p521(x4p,x2p) + weakmul_p521(xy2,x_1,y2) + +// t2 = (y + z)^2 + + sqr_p521(t2,t1) + +// d = 12 * xy2 - 9 * x4p +// t1 = y^2 + 2 * y * z + + cmsub_p521(d,12,xy2,9,x4p) + sub_p521(t1,t2,z2) + +// y4 = y^4 + + sqr_p521(y4,y2) + +// z_3' = 2 * y * z +// dx2 = d * x2p + + sub_p521(z_3,t1,y2) + weakmul_p521(dx2,d,x2p) + +// x' = 4 * xy2 - d + + cmsub41_p521(x_3,xy2,d) + +// y' = 3 * dx2 - 8 * y4 + + cmsub38_p521(y_3,dx2,y4) + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x27, x28, [sp], 16 + ldp x25, x26, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/p521/p521_jmixadd.S b/arm/p521/p521_jmixadd.S new file mode 100644 index 0000000000..6e8b46b00c --- /dev/null +++ b/arm/p521/p521_jmixadd.S @@ -0,0 +1,797 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point mixed addition on NIST curve P-521 in Jacobian coordinates +// +// extern void p521_jmixadd +// (uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 18]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples. +// A Jacobian triple (x,y,z) represents affine point (x/z^2,y/z^3). +// The "mixed" part means that p2 only has x and y coordinates, with the +// implicit z coordinate assumed to be the identity. It is assumed that +// all the coordinates of the input points p1 and p2 are fully reduced +// mod p_521, that the z coordinate of p1 is nonzero and that neither +// p1 =~= p2 or p1 =~= -p2, where "=~=" means "represents the same affine +// point as". +// +// Standard ARM ABI: X0 = p3, X1 = p1, X2 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p521_jmixadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p521_jmixadd) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 72 + +// Stable homes for input arguments during main code sequence + +#define input_z x26 +#define input_x x27 +#define input_y x28 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_2 input_y, #0 +#define y_2 input_y, #NUMSIZE + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define zp2 sp, #(NUMSIZE*0) +#define ww sp, #(NUMSIZE*0) + +#define yd sp, #(NUMSIZE*1) +#define y2a sp, #(NUMSIZE*1) + +#define x2a sp, #(NUMSIZE*2) +#define zzx2 sp, #(NUMSIZE*2) + +#define zz sp, #(NUMSIZE*3) +#define t1 sp, #(NUMSIZE*3) + +#define t2 sp, #(NUMSIZE*4) +#define zzx1 sp, #(NUMSIZE*4) + +#define xd sp, #(NUMSIZE*5) + +#define NSPACE (NUMSIZE*6) + +// Corresponds exactly to bignum_mul_p521_alt + +#define mul_p521(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x15, x3, x5; \ + umulh x16, x3, x5; \ + mul x14, x3, x6; \ + umulh x17, x3, x6; \ + adds x16, x16, x14; \ + ldp x7, x8, [P2+16]; \ + mul x14, x3, x7; \ + umulh x19, x3, x7; \ + adcs x17, x17, x14; \ + mul x14, x3, x8; \ + umulh x20, x3, x8; \ + adcs x19, x19, x14; \ + ldp x9, x10, [P2+32]; \ + mul x14, x3, x9; \ + umulh x21, x3, x9; \ + adcs x20, x20, x14; \ + mul x14, x3, x10; \ + umulh x22, x3, x10; \ + adcs x21, x21, x14; \ + ldp x11, x12, [P2+48]; \ + mul x14, x3, x11; \ + umulh x23, x3, x11; \ + adcs x22, x22, x14; \ + ldr x13, [P2+64]; \ + mul x14, x3, x12; \ + umulh x24, x3, x12; \ + adcs x23, x23, x14; \ + mul x14, x3, x13; \ + umulh x1, x3, x13; \ + adcs x24, x24, x14; \ + adc x1, x1, xzr; \ + mul x14, x4, x5; \ + adds x16, x16, x14; \ + mul x14, x4, x6; \ + adcs x17, x17, x14; \ + mul x14, x4, x7; \ + adcs x19, x19, x14; \ + mul x14, x4, x8; \ + adcs x20, x20, x14; \ + mul x14, x4, x9; \ + adcs x21, x21, x14; \ + mul x14, x4, x10; \ + adcs x22, x22, x14; \ + mul x14, x4, x11; \ + adcs x23, x23, x14; \ + mul x14, x4, x12; \ + adcs x24, x24, x14; \ + mul x14, x4, x13; \ + adcs x1, x1, x14; \ + cset x0, hs; \ + umulh x14, x4, x5; \ + adds x17, x17, x14; \ + umulh x14, x4, x6; \ + adcs x19, x19, x14; \ + umulh x14, x4, x7; \ + adcs x20, x20, x14; \ + umulh x14, x4, x8; \ + adcs x21, x21, x14; \ + umulh x14, x4, x9; \ + adcs x22, x22, x14; \ + umulh x14, x4, x10; \ + adcs x23, x23, x14; \ + umulh x14, x4, x11; \ + adcs x24, x24, x14; \ + umulh x14, x4, x12; \ + adcs x1, x1, x14; \ + umulh x14, x4, x13; \ + adc x0, x0, x14; \ + stp x15, x16, [P0]; \ + ldp x3, x4, [P1+16]; \ + mul x14, x3, x5; \ + adds x17, x17, x14; \ + mul x14, x3, x6; \ + adcs x19, x19, x14; \ + mul x14, x3, x7; \ + adcs x20, x20, x14; \ + mul x14, x3, x8; \ + adcs x21, x21, x14; \ + mul x14, x3, x9; \ + adcs x22, x22, x14; \ + mul x14, x3, x10; \ + adcs x23, x23, x14; \ + mul x14, x3, x11; \ + adcs x24, x24, x14; \ + mul x14, x3, x12; \ + adcs x1, x1, x14; \ + mul x14, x3, x13; \ + adcs x0, x0, x14; \ + cset x15, hs; \ + umulh x14, x3, x5; \ + adds x19, x19, x14; \ + umulh x14, x3, x6; \ + adcs x20, x20, x14; \ + umulh x14, x3, x7; \ + adcs x21, x21, x14; \ + umulh x14, x3, x8; \ + adcs x22, x22, x14; \ + umulh x14, x3, x9; \ + adcs x23, x23, x14; \ + umulh x14, x3, x10; \ + adcs x24, x24, x14; \ + umulh x14, x3, x11; \ + adcs x1, x1, x14; \ + umulh x14, x3, x12; \ + adcs x0, x0, x14; \ + umulh x14, x3, x13; \ + adc x15, x15, x14; \ + mul x14, x4, x5; \ + adds x19, x19, x14; \ + mul x14, x4, x6; \ + adcs x20, x20, x14; \ + mul x14, x4, x7; \ + adcs x21, x21, x14; \ + mul x14, x4, x8; \ + adcs x22, x22, x14; \ + mul x14, x4, x9; \ + adcs x23, x23, x14; \ + mul x14, x4, x10; \ + adcs x24, x24, x14; \ + mul x14, x4, x11; \ + adcs x1, x1, x14; \ + mul x14, x4, x12; \ + adcs x0, x0, x14; \ + mul x14, x4, x13; \ + adcs x15, x15, x14; \ + cset x16, hs; \ + umulh x14, x4, x5; \ + adds x20, x20, x14; \ + umulh x14, x4, x6; \ + adcs x21, x21, x14; \ + umulh x14, x4, x7; \ + adcs x22, x22, x14; \ + umulh x14, x4, x8; \ + adcs x23, x23, x14; \ + umulh x14, x4, x9; \ + adcs x24, x24, x14; \ + umulh x14, x4, x10; \ + adcs x1, x1, x14; \ + umulh x14, x4, x11; \ + adcs x0, x0, x14; \ + umulh x14, x4, x12; \ + adcs x15, x15, x14; \ + umulh x14, x4, x13; \ + adc x16, x16, x14; \ + stp x17, x19, [P0+16]; \ + ldp x3, x4, [P1+32]; \ + mul x14, x3, x5; \ + adds x20, x20, x14; \ + mul x14, x3, x6; \ + adcs x21, x21, x14; \ + mul x14, x3, x7; \ + adcs x22, x22, x14; \ + mul x14, x3, x8; \ + adcs x23, x23, x14; \ + mul x14, x3, x9; \ + adcs x24, x24, x14; \ + mul x14, x3, x10; \ + adcs x1, x1, x14; \ + mul x14, x3, x11; \ + adcs x0, x0, x14; \ + mul x14, x3, x12; \ + adcs x15, x15, x14; \ + mul x14, x3, x13; \ + adcs x16, x16, x14; \ + cset x17, hs; \ + umulh x14, x3, x5; \ + adds x21, x21, x14; \ + umulh x14, x3, x6; \ + adcs x22, x22, x14; \ + umulh x14, x3, x7; \ + adcs x23, x23, x14; \ + umulh x14, x3, x8; \ + adcs x24, x24, x14; \ + umulh x14, x3, x9; \ + adcs x1, x1, x14; \ + umulh x14, x3, x10; \ + adcs x0, x0, x14; \ + umulh x14, x3, x11; \ + adcs x15, x15, x14; \ + umulh x14, x3, x12; \ + adcs x16, x16, x14; \ + umulh x14, x3, x13; \ + adc x17, x17, x14; \ + mul x14, x4, x5; \ + adds x21, x21, x14; \ + mul x14, x4, x6; \ + adcs x22, x22, x14; \ + mul x14, x4, x7; \ + adcs x23, x23, x14; \ + mul x14, x4, x8; \ + adcs x24, x24, x14; \ + mul x14, x4, x9; \ + adcs x1, x1, x14; \ + mul x14, x4, x10; \ + adcs x0, x0, x14; \ + mul x14, x4, x11; \ + adcs x15, x15, x14; \ + mul x14, x4, x12; \ + adcs x16, x16, x14; \ + mul x14, x4, x13; \ + adcs x17, x17, x14; \ + cset x19, hs; \ + umulh x14, x4, x5; \ + adds x22, x22, x14; \ + umulh x14, x4, x6; \ + adcs x23, x23, x14; \ + umulh x14, x4, x7; \ + adcs x24, x24, x14; \ + umulh x14, x4, x8; \ + adcs x1, x1, x14; \ + umulh x14, x4, x9; \ + adcs x0, x0, x14; \ + umulh x14, x4, x10; \ + adcs x15, x15, x14; \ + umulh x14, x4, x11; \ + adcs x16, x16, x14; \ + umulh x14, x4, x12; \ + adcs x17, x17, x14; \ + umulh x14, x4, x13; \ + adc x19, x19, x14; \ + stp x20, x21, [P0+32]; \ + ldp x3, x4, [P1+48]; \ + mul x14, x3, x5; \ + adds x22, x22, x14; \ + mul x14, x3, x6; \ + adcs x23, x23, x14; \ + mul x14, x3, x7; \ + adcs x24, x24, x14; \ + mul x14, x3, x8; \ + adcs x1, x1, x14; \ + mul x14, x3, x9; \ + adcs x0, x0, x14; \ + mul x14, x3, x10; \ + adcs x15, x15, x14; \ + mul x14, x3, x11; \ + adcs x16, x16, x14; \ + mul x14, x3, x12; \ + adcs x17, x17, x14; \ + mul x14, x3, x13; \ + adcs x19, x19, x14; \ + cset x20, hs; \ + umulh x14, x3, x5; \ + adds x23, x23, x14; \ + umulh x14, x3, x6; \ + adcs x24, x24, x14; \ + umulh x14, x3, x7; \ + adcs x1, x1, x14; \ + umulh x14, x3, x8; \ + adcs x0, x0, x14; \ + umulh x14, x3, x9; \ + adcs x15, x15, x14; \ + umulh x14, x3, x10; \ + adcs x16, x16, x14; \ + umulh x14, x3, x11; \ + adcs x17, x17, x14; \ + umulh x14, x3, x12; \ + adcs x19, x19, x14; \ + umulh x14, x3, x13; \ + adc x20, x20, x14; \ + mul x14, x4, x5; \ + adds x23, x23, x14; \ + mul x14, x4, x6; \ + adcs x24, x24, x14; \ + mul x14, x4, x7; \ + adcs x1, x1, x14; \ + mul x14, x4, x8; \ + adcs x0, x0, x14; \ + mul x14, x4, x9; \ + adcs x15, x15, x14; \ + mul x14, x4, x10; \ + adcs x16, x16, x14; \ + mul x14, x4, x11; \ + adcs x17, x17, x14; \ + mul x14, x4, x12; \ + adcs x19, x19, x14; \ + mul x14, x4, x13; \ + adcs x20, x20, x14; \ + cset x21, hs; \ + umulh x14, x4, x5; \ + adds x24, x24, x14; \ + umulh x14, x4, x6; \ + adcs x1, x1, x14; \ + umulh x14, x4, x7; \ + adcs x0, x0, x14; \ + umulh x14, x4, x8; \ + adcs x15, x15, x14; \ + umulh x14, x4, x9; \ + adcs x16, x16, x14; \ + umulh x14, x4, x10; \ + adcs x17, x17, x14; \ + umulh x14, x4, x11; \ + adcs x19, x19, x14; \ + umulh x14, x4, x12; \ + adcs x20, x20, x14; \ + umulh x14, x4, x13; \ + adc x21, x21, x14; \ + stp x22, x23, [P0+48]; \ + ldr x3, [P1+64]; \ + mul x14, x3, x5; \ + adds x24, x24, x14; \ + mul x14, x3, x6; \ + adcs x1, x1, x14; \ + mul x14, x3, x7; \ + adcs x0, x0, x14; \ + mul x14, x3, x8; \ + adcs x15, x15, x14; \ + mul x14, x3, x9; \ + adcs x16, x16, x14; \ + mul x14, x3, x10; \ + adcs x17, x17, x14; \ + mul x14, x3, x11; \ + adcs x19, x19, x14; \ + mul x14, x3, x12; \ + adcs x20, x20, x14; \ + mul x14, x3, x13; \ + adc x21, x21, x14; \ + umulh x14, x3, x5; \ + adds x1, x1, x14; \ + umulh x14, x3, x6; \ + adcs x0, x0, x14; \ + umulh x14, x3, x7; \ + adcs x15, x15, x14; \ + umulh x14, x3, x8; \ + adcs x16, x16, x14; \ + umulh x14, x3, x9; \ + adcs x17, x17, x14; \ + umulh x14, x3, x10; \ + adcs x19, x19, x14; \ + umulh x14, x3, x11; \ + adcs x20, x20, x14; \ + umulh x14, x3, x12; \ + adc x21, x21, x14; \ + cmp xzr, xzr; \ + ldp x5, x6, [P0]; \ + extr x14, x1, x24, #9; \ + adcs x5, x5, x14; \ + extr x14, x0, x1, #9; \ + adcs x6, x6, x14; \ + ldp x7, x8, [P0+16]; \ + extr x14, x15, x0, #9; \ + adcs x7, x7, x14; \ + extr x14, x16, x15, #9; \ + adcs x8, x8, x14; \ + ldp x9, x10, [P0+32]; \ + extr x14, x17, x16, #9; \ + adcs x9, x9, x14; \ + extr x14, x19, x17, #9; \ + adcs x10, x10, x14; \ + ldp x11, x12, [P0+48]; \ + extr x14, x20, x19, #9; \ + adcs x11, x11, x14; \ + extr x14, x21, x20, #9; \ + adcs x12, x12, x14; \ + orr x13, x24, #0xfffffffffffffe00; \ + lsr x14, x21, #9; \ + adcs x13, x13, x14; \ + sbcs x5, x5, xzr; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbc x13, x13, xzr; \ + and x13, x13, #0x1ff; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +// Corresponds exactly to bignum_sqr_p521_alt + +#define sqr_p521(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x11, x2, x3; \ + umulh x12, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x10, x2, x4; \ + umulh x13, x2, x4; \ + adds x12, x12, x10; \ + ldp x6, x7, [P1+32]; \ + mul x10, x2, x5; \ + umulh x14, x2, x5; \ + adcs x13, x13, x10; \ + ldp x8, x9, [P1+48]; \ + mul x10, x2, x6; \ + umulh x15, x2, x6; \ + adcs x14, x14, x10; \ + mul x10, x2, x7; \ + umulh x16, x2, x7; \ + adcs x15, x15, x10; \ + mul x10, x2, x8; \ + umulh x17, x2, x8; \ + adcs x16, x16, x10; \ + mul x10, x2, x9; \ + umulh x19, x2, x9; \ + adcs x17, x17, x10; \ + adc x19, x19, xzr; \ + mul x10, x3, x4; \ + adds x13, x13, x10; \ + mul x10, x3, x5; \ + adcs x14, x14, x10; \ + mul x10, x3, x6; \ + adcs x15, x15, x10; \ + mul x10, x3, x7; \ + adcs x16, x16, x10; \ + mul x10, x3, x8; \ + adcs x17, x17, x10; \ + mul x10, x3, x9; \ + adcs x19, x19, x10; \ + cset x20, hs; \ + umulh x10, x3, x4; \ + adds x14, x14, x10; \ + umulh x10, x3, x5; \ + adcs x15, x15, x10; \ + umulh x10, x3, x6; \ + adcs x16, x16, x10; \ + umulh x10, x3, x7; \ + adcs x17, x17, x10; \ + umulh x10, x3, x8; \ + adcs x19, x19, x10; \ + umulh x10, x3, x9; \ + adc x20, x20, x10; \ + mul x10, x6, x7; \ + umulh x21, x6, x7; \ + adds x20, x20, x10; \ + adc x21, x21, xzr; \ + mul x10, x4, x5; \ + adds x15, x15, x10; \ + mul x10, x4, x6; \ + adcs x16, x16, x10; \ + mul x10, x4, x7; \ + adcs x17, x17, x10; \ + mul x10, x4, x8; \ + adcs x19, x19, x10; \ + mul x10, x4, x9; \ + adcs x20, x20, x10; \ + mul x10, x6, x8; \ + adcs x21, x21, x10; \ + cset x22, hs; \ + umulh x10, x4, x5; \ + adds x16, x16, x10; \ + umulh x10, x4, x6; \ + adcs x17, x17, x10; \ + umulh x10, x4, x7; \ + adcs x19, x19, x10; \ + umulh x10, x4, x8; \ + adcs x20, x20, x10; \ + umulh x10, x4, x9; \ + adcs x21, x21, x10; \ + umulh x10, x6, x8; \ + adc x22, x22, x10; \ + mul x10, x7, x8; \ + umulh x23, x7, x8; \ + adds x22, x22, x10; \ + adc x23, x23, xzr; \ + mul x10, x5, x6; \ + adds x17, x17, x10; \ + mul x10, x5, x7; \ + adcs x19, x19, x10; \ + mul x10, x5, x8; \ + adcs x20, x20, x10; \ + mul x10, x5, x9; \ + adcs x21, x21, x10; \ + mul x10, x6, x9; \ + adcs x22, x22, x10; \ + mul x10, x7, x9; \ + adcs x23, x23, x10; \ + cset x24, hs; \ + umulh x10, x5, x6; \ + adds x19, x19, x10; \ + umulh x10, x5, x7; \ + adcs x20, x20, x10; \ + umulh x10, x5, x8; \ + adcs x21, x21, x10; \ + umulh x10, x5, x9; \ + adcs x22, x22, x10; \ + umulh x10, x6, x9; \ + adcs x23, x23, x10; \ + umulh x10, x7, x9; \ + adc x24, x24, x10; \ + mul x10, x8, x9; \ + umulh x25, x8, x9; \ + adds x24, x24, x10; \ + adc x25, x25, xzr; \ + adds x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + adcs x15, x15, x15; \ + adcs x16, x16, x16; \ + adcs x17, x17, x17; \ + adcs x19, x19, x19; \ + adcs x20, x20, x20; \ + adcs x21, x21, x21; \ + adcs x22, x22, x22; \ + adcs x23, x23, x23; \ + adcs x24, x24, x24; \ + adcs x25, x25, x25; \ + cset x0, hs; \ + umulh x10, x2, x2; \ + adds x11, x11, x10; \ + mul x10, x3, x3; \ + adcs x12, x12, x10; \ + umulh x10, x3, x3; \ + adcs x13, x13, x10; \ + mul x10, x4, x4; \ + adcs x14, x14, x10; \ + umulh x10, x4, x4; \ + adcs x15, x15, x10; \ + mul x10, x5, x5; \ + adcs x16, x16, x10; \ + umulh x10, x5, x5; \ + adcs x17, x17, x10; \ + mul x10, x6, x6; \ + adcs x19, x19, x10; \ + umulh x10, x6, x6; \ + adcs x20, x20, x10; \ + mul x10, x7, x7; \ + adcs x21, x21, x10; \ + umulh x10, x7, x7; \ + adcs x22, x22, x10; \ + mul x10, x8, x8; \ + adcs x23, x23, x10; \ + umulh x10, x8, x8; \ + adcs x24, x24, x10; \ + mul x10, x9, x9; \ + adcs x25, x25, x10; \ + umulh x10, x9, x9; \ + adc x0, x0, x10; \ + ldr x1, [P1+64]; \ + add x1, x1, x1; \ + mul x10, x1, x2; \ + adds x19, x19, x10; \ + umulh x10, x1, x2; \ + adcs x20, x20, x10; \ + mul x10, x1, x4; \ + adcs x21, x21, x10; \ + umulh x10, x1, x4; \ + adcs x22, x22, x10; \ + mul x10, x1, x6; \ + adcs x23, x23, x10; \ + umulh x10, x1, x6; \ + adcs x24, x24, x10; \ + mul x10, x1, x8; \ + adcs x25, x25, x10; \ + umulh x10, x1, x8; \ + adcs x0, x0, x10; \ + lsr x4, x1, #1; \ + mul x4, x4, x4; \ + adc x4, x4, xzr; \ + mul x10, x1, x3; \ + adds x20, x20, x10; \ + umulh x10, x1, x3; \ + adcs x21, x21, x10; \ + mul x10, x1, x5; \ + adcs x22, x22, x10; \ + umulh x10, x1, x5; \ + adcs x23, x23, x10; \ + mul x10, x1, x7; \ + adcs x24, x24, x10; \ + umulh x10, x1, x7; \ + adcs x25, x25, x10; \ + mul x10, x1, x9; \ + adcs x0, x0, x10; \ + umulh x10, x1, x9; \ + adc x4, x4, x10; \ + mul x2, x2, x2; \ + cmp xzr, xzr; \ + extr x10, x20, x19, #9; \ + adcs x2, x2, x10; \ + extr x10, x21, x20, #9; \ + adcs x11, x11, x10; \ + extr x10, x22, x21, #9; \ + adcs x12, x12, x10; \ + extr x10, x23, x22, #9; \ + adcs x13, x13, x10; \ + extr x10, x24, x23, #9; \ + adcs x14, x14, x10; \ + extr x10, x25, x24, #9; \ + adcs x15, x15, x10; \ + extr x10, x0, x25, #9; \ + adcs x16, x16, x10; \ + extr x10, x4, x0, #9; \ + adcs x17, x17, x10; \ + orr x19, x19, #0xfffffffffffffe00; \ + lsr x10, x4, #9; \ + adcs x19, x19, x10; \ + sbcs x2, x2, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbcs x15, x15, xzr; \ + sbcs x16, x16, xzr; \ + sbcs x17, x17, xzr; \ + sbc x19, x19, xzr; \ + and x19, x19, #0x1ff; \ + stp x2, x11, [P0]; \ + stp x12, x13, [P0+16]; \ + stp x14, x15, [P0+32]; \ + stp x16, x17, [P0+48]; \ + str x19, [P0+64] + +// Corresponds exactly to bignum_sub_p521 + +#define sub_p521(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + sbcs x9, x9, x4; \ + sbcs x10, x10, x3; \ + ldp x11, x12, [P1+48]; \ + ldp x4, x3, [P2+48]; \ + sbcs x11, x11, x4; \ + sbcs x12, x12, x3; \ + ldr x13, [P1+64]; \ + ldr x4, [P2+64]; \ + sbcs x13, x13, x4; \ + sbcs x5, x5, xzr; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbcs x11, x11, xzr; \ + sbcs x12, x12, xzr; \ + sbcs x13, x13, xzr; \ + and x13, x13, #0x1ff; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32]; \ + stp x11, x12, [P0+48]; \ + str x13, [P0+64] + +S2N_BN_SYMBOL(p521_jmixadd): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x28, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + mov input_y, x2 + +// Main code, just a sequence of basic field operations + + sqr_p521(zp2,z_1) + mul_p521(y2a,z_1,y_2) + + mul_p521(x2a,zp2,x_2) + mul_p521(y2a,zp2,y2a) + + sub_p521(xd,x2a,x_1) + sub_p521(yd,y2a,y_1) + + sqr_p521(zz,xd) + sqr_p521(ww,yd) + + mul_p521(zzx1,zz,x_1) + mul_p521(zzx2,zz,x2a) + + sub_p521(x_3,ww,zzx1) + sub_p521(t1,zzx2,zzx1) + + mul_p521(z_3,xd,z_1) + + sub_p521(x_3,x_3,zzx2) + + sub_p521(t2,zzx1,x_3) + + mul_p521(t1,t1,y_1) + mul_p521(t2,yd,t2) + + sub_p521(y_3,t2,t1) + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x27, x28, [sp], 16 + ldp x25, x26, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/p521/p521_jadd.S b/x86_att/p521/p521_jadd.S new file mode 100644 index 0000000000..c1ac9a235f --- /dev/null +++ b/x86_att/p521/p521_jadd.S @@ -0,0 +1,765 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point addition on NIST curve P-521 in Jacobian coordinates +// +// extern void p521_jadd +// (uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 27]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples. +// A Jacobian triple (x,y,z) represents affine point (x/z^2,y/z^3). +// It is assumed that all coordinates of the input points p1 and p2 are +// fully reduced mod p_521, that both z coordinates are nonzero and +// that neither p1 =~= p2 or p1 =~= -p2, where "=~=" means "represents +// the same affine point as". +// +// Standard x86-64 ABI: RDI = p3, RSI = p1, RDX = p2 +// Microsoft x64 ABI: RCX = p3, RDX = p1, R8 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p521_jadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p521_jadd) + .text + +// Size of individual field elements + +#define NUMSIZE 72 + +// Stable homes for input arguments during main code sequence +// These are where they arrive except for input_y, initially in %rdx + +#define input_z %rdi +#define input_x %rsi +#define input_y %rcx + +// Pointer-offset pairs for inputs and outputs + +#define x_1 0(input_x) +#define y_1 NUMSIZE(input_x) +#define z_1 (2*NUMSIZE)(input_x) + +#define x_2 0(input_y) +#define y_2 NUMSIZE(input_y) +#define z_2 (2*NUMSIZE)(input_y) + +#define x_3 0(input_z) +#define y_3 NUMSIZE(input_z) +#define z_3 (2*NUMSIZE)(input_z) + +// Pointer-offset pairs for temporaries, with some aliasing +// The tmp field is internal storage for field mul and sqr. +// NSPACE is the total stack needed for these temporaries + +#define z1sq (NUMSIZE*0)(%rsp) +#define ww (NUMSIZE*0)(%rsp) + +#define yd (NUMSIZE*1)(%rsp) +#define y2a (NUMSIZE*1)(%rsp) + +#define x2a (NUMSIZE*2)(%rsp) +#define zzx2 (NUMSIZE*2)(%rsp) + +#define zz (NUMSIZE*3)(%rsp) +#define t1 (NUMSIZE*3)(%rsp) + +#define t2 (NUMSIZE*4)(%rsp) +#define x1a (NUMSIZE*4)(%rsp) +#define zzx1 (NUMSIZE*4)(%rsp) + +#define xd (NUMSIZE*5)(%rsp) +#define z2sq (NUMSIZE*5)(%rsp) + +#define y1a (NUMSIZE*6)(%rsp) + +#define tmp (NUMSIZE*7)(%rsp) + +#define NSPACE (NUMSIZE*7+64) + +// Corresponds exactly to bignum_mul_p521 + +#define mul_p521(P0,P1,P2) \ + xorl %ebp, %ebp ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + movq %r8, 504(%rsp) ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + adcq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + mulxq 0x30+P1, %rbx, %r15 ; \ + adcq %rbx, %r14 ; \ + mulxq 0x38+P1, %rbx, %r8 ; \ + adcq %rbx, %r15 ; \ + adcq %rbp, %r8 ; \ + movq 0x8+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq %r9, 512(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rbp, %r9 ; \ + adcq %rbp, %r9 ; \ + movq 0x10+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq %r10, 520(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rbp, %r10 ; \ + adcq %rbp, %r10 ; \ + movq 0x18+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 528(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rbp, %r11 ; \ + adcq %rbp, %r11 ; \ + movq 0x20+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 536(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x38+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbp, %r12 ; \ + adcq %rbp, %r12 ; \ + movq 0x28+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 544(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x38+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rbp, %r13 ; \ + adcq %rbp, %r13 ; \ + movq 0x30+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 552(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x38+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rbp, %r14 ; \ + adcq %rbp, %r14 ; \ + movq 0x38+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 560(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x38+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rbp, %r15 ; \ + adcq %rbp, %r15 ; \ + movq 0x40+P1, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P2, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P2, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P2, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P2, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P2, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P2, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P2, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P2, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbp, %rbx ; \ + adcq %rbx, %rbp ; \ + movq 0x40+P2, %rdx ; \ + xorl %eax, %eax ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rbp ; \ + mulxq 0x40+P1, %rax, %rbx ; \ + adcq %rax, %rbp ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rbp, %r15 ; \ + shrq $0x9, %rbp ; \ + addq %rax, %rbp ; \ + stc; \ + adcq 504(%rsp), %r8 ; \ + adcq 512(%rsp), %r9 ; \ + adcq 520(%rsp), %r10 ; \ + adcq 528(%rsp), %r11 ; \ + adcq 536(%rsp), %r12 ; \ + adcq 544(%rsp), %r13 ; \ + adcq 552(%rsp), %r14 ; \ + adcq 560(%rsp), %r15 ; \ + adcq $0xfffffffffffffe00, %rbp ; \ + cmc; \ + sbbq $0x0, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 ; \ + sbbq $0x0, %r14 ; \ + movq %r14, 0x30+P0 ; \ + sbbq $0x0, %r15 ; \ + movq %r15, 0x38+P0 ; \ + sbbq $0x0, %rbp ; \ + andq $0x1ff, %rbp ; \ + movq %rbp, 0x40+P0 + +// Corresponds exactly to bignum_sqr_p521 + +#define sqr_p521(P0,P1) \ + xorl %ebp, %ebp ; \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %rax ; \ + movq %r9, 512(%rsp) ; \ + mulxq 0x10+P1, %r10, %rbx ; \ + adcxq %rax, %r10 ; \ + movq %r10, 520(%rsp) ; \ + mulxq 0x18+P1, %r11, %rax ; \ + adcxq %rbx, %r11 ; \ + mulxq 0x20+P1, %r12, %rbx ; \ + adcxq %rax, %r12 ; \ + mulxq 0x28+P1, %r13, %rax ; \ + adcxq %rbx, %r13 ; \ + mulxq 0x30+P1, %r14, %rbx ; \ + adcxq %rax, %r14 ; \ + mulxq 0x38+P1, %r15, %r8 ; \ + adcxq %rbx, %r15 ; \ + adcxq %rbp, %r8 ; \ + xorl %ebp, %ebp ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 528(%rsp) ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 536(%rsp) ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rbp, %r9 ; \ + movq 0x20+P1, %rdx ; \ + mulxq 0x28+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rbp, %r10 ; \ + adcxq %rbp, %r10 ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 544(%rsp) ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 552(%rsp) ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq 0x30+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rbp, %r11 ; \ + mulxq 0x28+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbp, %r12 ; \ + adcxq %rbp, %r12 ; \ + xorl %ebp, %ebp ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 560(%rsp) ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq 0x38+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rbp, %r13 ; \ + mulxq 0x30+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rbp, %r14 ; \ + adcxq %rbp, %r14 ; \ + xorl %ebp, %ebp ; \ + movq P1, %rdx ; \ + mulxq %rdx, %rax, %rbx ; \ + movq %rax, 504(%rsp) ; \ + movq 512(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 512(%rsp) ; \ + movq 520(%rsp), %rax ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 520(%rsp) ; \ + movq 528(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 528(%rsp) ; \ + movq 536(%rsp), %rax ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 536(%rsp) ; \ + movq 544(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 544(%rsp) ; \ + movq 552(%rsp), %rax ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 552(%rsp) ; \ + movq 560(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 560(%rsp) ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r8, %r8 ; \ + adoxq %rdx, %r8 ; \ + adcxq %r9, %r9 ; \ + adoxq %rbx, %r9 ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r10, %r10 ; \ + adoxq %rdx, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rbx, %r11 ; \ + movq 0x30+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r12, %r12 ; \ + adoxq %rdx, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rbx, %r13 ; \ + movq 0x38+P1, %rdx ; \ + mulxq %rdx, %rdx, %r15 ; \ + adcxq %r14, %r14 ; \ + adoxq %rdx, %r14 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %r15 ; \ + movq 0x40+P1, %rdx ; \ + movq %rdx, %rbp ; \ + imulq %rbp, %rbp ; \ + addq %rdx, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rbp ; \ + adcq $0x0, %rbp ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rbp, %r15 ; \ + shrq $0x9, %rbp ; \ + addq %rax, %rbp ; \ + stc; \ + adcq 504(%rsp), %r8 ; \ + adcq 512(%rsp), %r9 ; \ + adcq 520(%rsp), %r10 ; \ + adcq 528(%rsp), %r11 ; \ + adcq 536(%rsp), %r12 ; \ + adcq 544(%rsp), %r13 ; \ + adcq 552(%rsp), %r14 ; \ + adcq 560(%rsp), %r15 ; \ + adcq $0xfffffffffffffe00, %rbp ; \ + cmc; \ + sbbq $0x0, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 ; \ + sbbq $0x0, %r14 ; \ + movq %r14, 0x30+P0 ; \ + sbbq $0x0, %r15 ; \ + movq %r15, 0x38+P0 ; \ + sbbq $0x0, %rbp ; \ + andq $0x1ff, %rbp ; \ + movq %rbp, 0x40+P0 + +// Corresponds exactly to bignum_sub_p521 + +#define sub_p521(P0,P1,P2) \ + movq P1, %rax ; \ + subq P2, %rax ; \ + movq 0x8+P1, %rdx ; \ + sbbq 0x8+P2, %rdx ; \ + movq 0x10+P1, %r8 ; \ + sbbq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + sbbq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + sbbq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + sbbq 0x28+P2, %r11 ; \ + movq 0x30+P1, %r12 ; \ + sbbq 0x30+P2, %r12 ; \ + movq 0x38+P1, %r13 ; \ + sbbq 0x38+P2, %r13 ; \ + movq 0x40+P1, %r14 ; \ + sbbq 0x40+P2, %r14 ; \ + sbbq $0x0, %rax ; \ + movq %rax, P0 ; \ + sbbq $0x0, %rdx ; \ + movq %rdx, 0x8+P0 ; \ + sbbq $0x0, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x30+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x38+P0 ; \ + sbbq $0x0, %r14 ; \ + andq $0x1ff, %r14 ; \ + movq %r14, 0x40+P0 + +S2N_BN_SYMBOL(p521_jadd): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save registers and make room on stack for temporary variables + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Move the input arguments to stable places (two are already there) + + movq %rdx, input_y + +// Main code, just a sequence of basic field operations + + sqr_p521(z1sq,z_1) + sqr_p521(z2sq,z_2) + + mul_p521(y1a,z_2,y_1) + mul_p521(y2a,z_1,y_2) + + mul_p521(x2a,z1sq,x_2) + mul_p521(x1a,z2sq,x_1) + mul_p521(y2a,z1sq,y2a) + mul_p521(y1a,z2sq,y1a) + + sub_p521(xd,x2a,x1a) + sub_p521(yd,y2a,y1a) + + sqr_p521(zz,xd) + sqr_p521(ww,yd) + + mul_p521(zzx1,zz,x1a) + mul_p521(zzx2,zz,x2a) + + sub_p521(x_3,ww,zzx1) + sub_p521(t1,zzx2,zzx1) + + mul_p521(xd,xd,z_1) + + sub_p521(x_3,x_3,zzx2) + + sub_p521(t2,zzx1,x_3) + + mul_p521(t1,t1,y1a) + mul_p521(z_3,xd,z_2) + mul_p521(t2,yd,t2) + + sub_p521(y_3,t2,t1) + +// Restore stack and registers + + addq $NSPACE, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/p521/p521_jdouble.S b/x86_att/p521/p521_jdouble.S new file mode 100644 index 0000000000..16a5deeb04 --- /dev/null +++ b/x86_att/p521/p521_jdouble.S @@ -0,0 +1,1386 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point doubling on NIST curve P-521 in Jacobian coordinates +// +// extern void p521_jdouble +// (uint64_t p3[static 27],uint64_t p1[static 27]); +// +// Does p3 := 2 * p1 where all points are regarded as Jacobian triples. +// A Jacobian triple (x,y,z) represents affine point (x/z^2,y/z^3). +// It is assumed that all coordinates of the input point are fully +// reduced mod p_521 and that the z coordinate is not zero. +// +// Standard x86-64 ABI: RDI = p3, RSI = p1 +// Microsoft x64 ABI: RCX = p3, RDX = p1 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p521_jdouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p521_jdouble) + .text + +// Size of individual field elements + +#define NUMSIZE 72 + +// Stable homes for input arguments during main code sequence +// This is actually where they come in anyway and they stay there. + +#define input_z %rdi +#define input_x %rsi + +// Pointer-offset pairs for inputs and outputs + +#define x_1 0(input_x) +#define y_1 NUMSIZE(input_x) +#define z_1 (2*NUMSIZE)(input_x) + +#define x_3 0(input_z) +#define y_3 NUMSIZE(input_z) +#define z_3 (2*NUMSIZE)(input_z) + +// Pointer-offset pairs for temporaries, with some aliasing +// The tmp field is internal storage for field mul and sqr. +// NSPACE is the total stack needed for these temporaries + +#define z2 (NUMSIZE*0)(%rsp) +#define y2 (NUMSIZE*1)(%rsp) +#define x2p (NUMSIZE*2)(%rsp) +#define xy2 (NUMSIZE*3)(%rsp) + +#define y4 (NUMSIZE*4)(%rsp) +#define t2 (NUMSIZE*4)(%rsp) + +#define dx2 (NUMSIZE*5)(%rsp) +#define t1 (NUMSIZE*5)(%rsp) + +#define d (NUMSIZE*6)(%rsp) +#define x4p (NUMSIZE*6)(%rsp) + +#define tmp (NUMSIZE*7)(%rsp) + +#define NSPACE (NUMSIZE*7+64) + +// Corresponds exactly to bignum_mul_p521 + +#define mul_p521(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + movq %r8, 504(%rsp) ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + adcq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + mulxq 0x30+P1, %rbx, %r15 ; \ + adcq %rbx, %r14 ; \ + mulxq 0x38+P1, %rbx, %r8 ; \ + adcq %rbx, %r15 ; \ + adcq %rcx, %r8 ; \ + movq 0x8+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq %r9, 512(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + adcq %rcx, %r9 ; \ + movq 0x10+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq %r10, 520(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + adcq %rcx, %r10 ; \ + movq 0x18+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 528(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + adcq %rcx, %r11 ; \ + movq 0x20+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 536(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x38+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcq %rcx, %r12 ; \ + movq 0x28+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 544(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x38+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcq %rcx, %r13 ; \ + movq 0x30+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 552(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x38+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcq %rcx, %r14 ; \ + movq 0x38+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 560(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x38+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcq %rcx, %r15 ; \ + movq 0x40+P1, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P2, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P2, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P2, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P2, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P2, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P2, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P2, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P2, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rcx, %rbx ; \ + adcq %rbx, %rcx ; \ + movq 0x40+P2, %rdx ; \ + xorl %eax, %eax ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rcx ; \ + mulxq 0x40+P1, %rax, %rbx ; \ + adcq %rax, %rcx ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rcx, %r15 ; \ + shrq $0x9, %rcx ; \ + addq %rax, %rcx ; \ + stc; \ + adcq 504(%rsp), %r8 ; \ + adcq 512(%rsp), %r9 ; \ + adcq 520(%rsp), %r10 ; \ + adcq 528(%rsp), %r11 ; \ + adcq 536(%rsp), %r12 ; \ + adcq 544(%rsp), %r13 ; \ + adcq 552(%rsp), %r14 ; \ + adcq 560(%rsp), %r15 ; \ + adcq $0xfffffffffffffe00, %rcx ; \ + cmc; \ + sbbq $0x0, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 ; \ + sbbq $0x0, %r14 ; \ + movq %r14, 0x30+P0 ; \ + sbbq $0x0, %r15 ; \ + movq %r15, 0x38+P0 ; \ + sbbq $0x0, %rcx ; \ + andq $0x1ff, %rcx ; \ + movq %rcx, 0x40+P0 + +// Corresponds exactly to bignum_sqr_p521 + +#define sqr_p521(P0,P1) \ + xorl %ecx, %ecx ; \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %rax ; \ + movq %r9, 512(%rsp) ; \ + mulxq 0x10+P1, %r10, %rbx ; \ + adcxq %rax, %r10 ; \ + movq %r10, 520(%rsp) ; \ + mulxq 0x18+P1, %r11, %rax ; \ + adcxq %rbx, %r11 ; \ + mulxq 0x20+P1, %r12, %rbx ; \ + adcxq %rax, %r12 ; \ + mulxq 0x28+P1, %r13, %rax ; \ + adcxq %rbx, %r13 ; \ + mulxq 0x30+P1, %r14, %rbx ; \ + adcxq %rax, %r14 ; \ + mulxq 0x38+P1, %r15, %r8 ; \ + adcxq %rbx, %r15 ; \ + adcxq %rcx, %r8 ; \ + xorl %ecx, %ecx ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 528(%rsp) ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 536(%rsp) ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + movq 0x20+P1, %rdx ; \ + mulxq 0x28+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + adcxq %rcx, %r10 ; \ + xorl %ecx, %ecx ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 544(%rsp) ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 552(%rsp) ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq 0x30+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq 0x28+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcxq %rcx, %r12 ; \ + xorl %ecx, %ecx ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 560(%rsp) ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq 0x38+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + mulxq 0x30+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcxq %rcx, %r14 ; \ + xorl %ecx, %ecx ; \ + movq P1, %rdx ; \ + mulxq %rdx, %rax, %rbx ; \ + movq %rax, 504(%rsp) ; \ + movq 512(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 512(%rsp) ; \ + movq 520(%rsp), %rax ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 520(%rsp) ; \ + movq 528(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 528(%rsp) ; \ + movq 536(%rsp), %rax ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 536(%rsp) ; \ + movq 544(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 544(%rsp) ; \ + movq 552(%rsp), %rax ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 552(%rsp) ; \ + movq 560(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 560(%rsp) ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r8, %r8 ; \ + adoxq %rdx, %r8 ; \ + adcxq %r9, %r9 ; \ + adoxq %rbx, %r9 ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r10, %r10 ; \ + adoxq %rdx, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rbx, %r11 ; \ + movq 0x30+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r12, %r12 ; \ + adoxq %rdx, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rbx, %r13 ; \ + movq 0x38+P1, %rdx ; \ + mulxq %rdx, %rdx, %r15 ; \ + adcxq %r14, %r14 ; \ + adoxq %rdx, %r14 ; \ + adcxq %rcx, %r15 ; \ + adoxq %rcx, %r15 ; \ + movq 0x40+P1, %rdx ; \ + movq %rdx, %rcx ; \ + imulq %rcx, %rcx ; \ + addq %rdx, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rcx ; \ + adcq $0x0, %rcx ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rcx, %r15 ; \ + shrq $0x9, %rcx ; \ + addq %rax, %rcx ; \ + stc; \ + adcq 504(%rsp), %r8 ; \ + adcq 512(%rsp), %r9 ; \ + adcq 520(%rsp), %r10 ; \ + adcq 528(%rsp), %r11 ; \ + adcq 536(%rsp), %r12 ; \ + adcq 544(%rsp), %r13 ; \ + adcq 552(%rsp), %r14 ; \ + adcq 560(%rsp), %r15 ; \ + adcq $0xfffffffffffffe00, %rcx ; \ + cmc; \ + sbbq $0x0, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 ; \ + sbbq $0x0, %r14 ; \ + movq %r14, 0x30+P0 ; \ + sbbq $0x0, %r15 ; \ + movq %r15, 0x38+P0 ; \ + sbbq $0x0, %rcx ; \ + andq $0x1ff, %rcx ; \ + movq %rcx, 0x40+P0 + +// Corresponds exactly to bignum_add_p521 + +#define add_p521(P0,P1,P2) \ + stc; \ + movq P1, %rax ; \ + adcq P2, %rax ; \ + movq 0x8+P1, %rbx ; \ + adcq 0x8+P2, %rbx ; \ + movq 0x10+P1, %r8 ; \ + adcq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + adcq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + adcq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + adcq 0x28+P2, %r11 ; \ + movq 0x30+P1, %r12 ; \ + adcq 0x30+P2, %r12 ; \ + movq 0x38+P1, %r13 ; \ + adcq 0x38+P2, %r13 ; \ + movq 0x40+P1, %r14 ; \ + adcq 0x40+P2, %r14 ; \ + movq $0x200, %rdx ; \ + andq %r14, %rdx ; \ + cmpq $0x200, %rdx ; \ + sbbq $0x0, %rax ; \ + movq %rax, P0 ; \ + sbbq $0x0, %rbx ; \ + movq %rbx, 0x8+P0 ; \ + sbbq $0x0, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x30+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x38+P0 ; \ + sbbq %rdx, %r14 ; \ + movq %r14, 0x40+P0 + +// Corresponds exactly to bignum_sub_p521 + +#define sub_p521(P0,P1,P2) \ + movq P1, %rax ; \ + subq P2, %rax ; \ + movq 0x8+P1, %rdx ; \ + sbbq 0x8+P2, %rdx ; \ + movq 0x10+P1, %r8 ; \ + sbbq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + sbbq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + sbbq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + sbbq 0x28+P2, %r11 ; \ + movq 0x30+P1, %r12 ; \ + sbbq 0x30+P2, %r12 ; \ + movq 0x38+P1, %r13 ; \ + sbbq 0x38+P2, %r13 ; \ + movq 0x40+P1, %r14 ; \ + sbbq 0x40+P2, %r14 ; \ + sbbq $0x0, %rax ; \ + movq %rax, P0 ; \ + sbbq $0x0, %rdx ; \ + movq %rdx, 0x8+P0 ; \ + sbbq $0x0, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x30+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x38+P0 ; \ + sbbq $0x0, %r14 ; \ + andq $0x1ff, %r14 ; \ + movq %r14, 0x40+P0 + +// Weak multiplication not fully reducing + +#define weakmul_p521(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + movq %r8, 504(%rsp) ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + adcq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + mulxq 0x30+P1, %rbx, %r15 ; \ + adcq %rbx, %r14 ; \ + mulxq 0x38+P1, %rbx, %r8 ; \ + adcq %rbx, %r15 ; \ + adcq %rcx, %r8 ; \ + movq 0x8+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq %r9, 512(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + adcq %rcx, %r9 ; \ + movq 0x10+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq %r10, 520(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + adcq %rcx, %r10 ; \ + movq 0x18+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 528(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + adcq %rcx, %r11 ; \ + movq 0x20+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 536(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x38+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcq %rcx, %r12 ; \ + movq 0x28+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 544(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x38+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcq %rcx, %r13 ; \ + movq 0x30+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 552(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x38+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcq %rcx, %r14 ; \ + movq 0x38+P2, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 560(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x38+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcq %rcx, %r15 ; \ + movq 0x40+P1, %rdx ; \ + xorl %ecx, %ecx ; \ + mulxq P2, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P2, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P2, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P2, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P2, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P2, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P2, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P2, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rcx, %rbx ; \ + adcq %rbx, %rcx ; \ + movq 0x40+P2, %rdx ; \ + xorl %eax, %eax ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rcx ; \ + mulxq 0x40+P1, %rax, %rbx ; \ + adcq %rax, %rcx ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rcx, %r15 ; \ + shrq $0x9, %rcx ; \ + addq %rax, %rcx ; \ + addq 504(%rsp), %r8 ; \ + movq %r8, P0 ; \ + adcq 512(%rsp), %r9 ; \ + movq %r9, 0x8+P0 ; \ + adcq 520(%rsp), %r10 ; \ + movq %r10, 0x10+P0 ; \ + adcq 528(%rsp), %r11 ; \ + movq %r11, 0x18+P0 ; \ + adcq 536(%rsp), %r12 ; \ + movq %r12, 0x20+P0 ; \ + adcq 544(%rsp), %r13 ; \ + movq %r13, 0x28+P0 ; \ + adcq 552(%rsp), %r14 ; \ + movq %r14, 0x30+P0 ; \ + adcq 560(%rsp), %r15 ; \ + movq %r15, 0x38+P0 ; \ + adcq $0, %rcx ; \ + movq %rcx, 0x40+P0 + +// P0 = C * P1 - D * P2 == C * P1 + D * (p_521 - P2) + +#define cmsub_p521(P0,C,P1,D,P2) \ + movq $D, %rdx ; \ + movq 64+P2, %rbx ; \ + xorq $0x1FF, %rbx ; \ + movq P2, %rax ; \ + notq %rax; \ + mulxq %rax, %r8, %r9 ; \ + movq 8+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %r10 ; \ + addq %rax, %r9 ; \ + movq 16+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + movq 24+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + movq 32+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %r13 ; \ + adcq %rax, %r12 ; \ + movq 40+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %r14 ; \ + adcq %rax, %r13 ; \ + movq 48+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %r15 ; \ + adcq %rax, %r14 ; \ + movq 56+P2, %rax ; \ + notq %rax; \ + mulxq %rax, %rax, %rcx ; \ + adcq %rax, %r15 ; \ + mulxq %rbx, %rbx, %rax ; \ + adcq %rcx, %rbx ; \ + xorl %eax, %eax ; \ + movq $C, %rdx ; \ + mulxq P1, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + mulxq 8+P1, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + mulxq 16+P1, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq 24+P1, %rax, %rcx ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + mulxq 32+P1, %rax, %rcx ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + mulxq 40+P1, %rax, %rcx ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + mulxq 48+P1, %rax, %rcx ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + mulxq 56+P1, %rax, %rcx ; \ + adcxq %rax, %r15 ; \ + adoxq %rcx, %rbx ; \ + mulxq 64+P1, %rax, %rcx ; \ + adcxq %rax, %rbx ; \ + movq %r9, %rax ; \ + andq %r10, %rax ; \ + andq %r11, %rax ; \ + andq %r12, %rax ; \ + andq %r13, %rax ; \ + andq %r14, %rax ; \ + andq %r15, %rax ; \ + movq %rbx, %rdx ; \ + shrq $9, %rdx ; \ + orq $~0x1FF, %rbx ; \ + leaq 1(%rdx), %rcx ; \ + addq %r8, %rcx ; \ + movl $0, %ecx ; \ + adcq %rcx, %rax ; \ + movq %rbx, %rax ; \ + adcq %rcx, %rax ; \ + adcq %rdx, %r8 ; \ + movq %r8, P0 ; \ + adcq %rcx, %r9 ; \ + movq %r9, 8+P0 ; \ + adcq %rcx, %r10 ; \ + movq %r10, 16+P0 ; \ + adcq %rcx, %r11 ; \ + movq %r11, 24+P0 ; \ + adcq %rcx, %r12 ; \ + movq %r12, 32+P0 ; \ + adcq %rcx, %r13 ; \ + movq %r13, 40+P0 ; \ + adcq %rcx, %r14 ; \ + movq %r14, 48+P0 ; \ + adcq %rcx, %r15 ; \ + movq %r15, 56+P0 ; \ + adcq %rcx, %rbx ; \ + andq $0x1FF, %rbx ; \ + movq %rbx, 64+P0 + +// P0 = 3 * P1 - 8 * P2 == 3 * P1 + 8 * (p_521 - P2) + +#define cmsub38_p521(P0,P1,P2) \ + movq 64+P2, %rbx ; \ + xorq $0x1FF, %rbx ; \ + movq 56+P2, %r15 ; \ + notq %r15; \ + shldq $3, %r15, %rbx ; \ + movq 48+P2, %r14 ; \ + notq %r14; \ + shldq $3, %r14, %r15 ; \ + movq 40+P2, %r13 ; \ + notq %r13; \ + shldq $3, %r13, %r14 ; \ + movq 32+P2, %r12 ; \ + notq %r12; \ + shldq $3, %r12, %r13 ; \ + movq 24+P2, %r11 ; \ + notq %r11; \ + shldq $3, %r11, %r12 ; \ + movq 16+P2, %r10 ; \ + notq %r10; \ + shldq $3, %r10, %r11 ; \ + movq 8+P2, %r9 ; \ + notq %r9; \ + shldq $3, %r9, %r10 ; \ + movq P2, %r8 ; \ + notq %r8; \ + shldq $3, %r8, %r9 ; \ + shlq $3, %r8 ; \ + movq $3, %rdx ; \ + xorl %eax, %eax ; \ + mulxq P1, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + mulxq 8+P1, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + mulxq 16+P1, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq 24+P1, %rax, %rcx ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + mulxq 32+P1, %rax, %rcx ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + mulxq 40+P1, %rax, %rcx ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + mulxq 48+P1, %rax, %rcx ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + mulxq 56+P1, %rax, %rcx ; \ + adcxq %rax, %r15 ; \ + adoxq %rcx, %rbx ; \ + mulxq 64+P1, %rax, %rcx ; \ + adcxq %rax, %rbx ; \ + movq %r9, %rax ; \ + andq %r10, %rax ; \ + andq %r11, %rax ; \ + andq %r12, %rax ; \ + andq %r13, %rax ; \ + andq %r14, %rax ; \ + andq %r15, %rax ; \ + movq %rbx, %rdx ; \ + shrq $9, %rdx ; \ + orq $~0x1FF, %rbx ; \ + leaq 1(%rdx), %rcx ; \ + addq %r8, %rcx ; \ + movl $0, %ecx ; \ + adcq %rcx, %rax ; \ + movq %rbx, %rax ; \ + adcq %rcx, %rax ; \ + adcq %rdx, %r8 ; \ + movq %r8, P0 ; \ + adcq %rcx, %r9 ; \ + movq %r9, 8+P0 ; \ + adcq %rcx, %r10 ; \ + movq %r10, 16+P0 ; \ + adcq %rcx, %r11 ; \ + movq %r11, 24+P0 ; \ + adcq %rcx, %r12 ; \ + movq %r12, 32+P0 ; \ + adcq %rcx, %r13 ; \ + movq %r13, 40+P0 ; \ + adcq %rcx, %r14 ; \ + movq %r14, 48+P0 ; \ + adcq %rcx, %r15 ; \ + movq %r15, 56+P0 ; \ + adcq %rcx, %rbx ; \ + andq $0x1FF, %rbx ; \ + movq %rbx, 64+P0 + +// P0 = 4 * P1 - P2 = 4 * P1 + (p_521 - P2) + +#define cmsub41_p521(P0,P1,P2) \ + movq 64+P1, %rbx ; \ + movq 56+P1, %r15 ; \ + shldq $2, %r15, %rbx ; \ + movq 48+P1, %r14 ; \ + shldq $2, %r14, %r15 ; \ + movq 40+P1, %r13 ; \ + shldq $2, %r13, %r14 ; \ + movq 32+P1, %r12 ; \ + shldq $2, %r12, %r13 ; \ + movq 24+P1, %r11 ; \ + shldq $2, %r11, %r12 ; \ + movq 16+P1, %r10 ; \ + shldq $2, %r10, %r11 ; \ + movq 8+P1, %r9 ; \ + shldq $2, %r9, %r10 ; \ + movq P1, %r8 ; \ + shldq $2, %r8, %r9 ; \ + shlq $2, %r8 ; \ + movq 64+P2, %rcx ; \ + xorq $0x1FF, %rcx ; \ + movq P2, %rax ; \ + notq %rax; \ + addq %rax, %r8 ; \ + movq 8+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r9 ; \ + movq 16+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r10 ; \ + movq 24+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r11 ; \ + movq 32+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r12 ; \ + movq 40+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r13 ; \ + movq 48+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r14 ; \ + movq 56+P2, %rax ; \ + notq %rax; \ + adcq %rax, %r15 ; \ + adcq %rcx, %rbx ; \ + movq %r9, %rax ; \ + andq %r10, %rax ; \ + andq %r11, %rax ; \ + andq %r12, %rax ; \ + andq %r13, %rax ; \ + andq %r14, %rax ; \ + andq %r15, %rax ; \ + movq %rbx, %rdx ; \ + shrq $9, %rdx ; \ + orq $~0x1FF, %rbx ; \ + leaq 1(%rdx), %rcx ; \ + addq %r8, %rcx ; \ + movl $0, %ecx ; \ + adcq %rcx, %rax ; \ + movq %rbx, %rax ; \ + adcq %rcx, %rax ; \ + adcq %rdx, %r8 ; \ + movq %r8, P0 ; \ + adcq %rcx, %r9 ; \ + movq %r9, 8+P0 ; \ + adcq %rcx, %r10 ; \ + movq %r10, 16+P0 ; \ + adcq %rcx, %r11 ; \ + movq %r11, 24+P0 ; \ + adcq %rcx, %r12 ; \ + movq %r12, 32+P0 ; \ + adcq %rcx, %r13 ; \ + movq %r13, 40+P0 ; \ + adcq %rcx, %r14 ; \ + movq %r14, 48+P0 ; \ + adcq %rcx, %r15 ; \ + movq %r15, 56+P0 ; \ + adcq %rcx, %rbx ; \ + andq $0x1FF, %rbx ; \ + movq %rbx, 64+P0 + +S2N_BN_SYMBOL(p521_jdouble): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +// Save registers and make room on stack for temporary variables + + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Main code, just a sequence of basic field operations + +// z2 = z^2 +// y2 = y^2 + + sqr_p521(z2,z_1) + sqr_p521(y2,y_1) + +// x2p = x^2 - z^4 = (x + z^2) * (x - z^2) + + add_p521(t1,x_1,z2) + sub_p521(t2,x_1,z2) + mul_p521(x2p,t1,t2) + +// t1 = y + z +// x4p = x2p^2 +// xy2 = x * y^2 + + add_p521(t1,y_1,z_1) + sqr_p521(x4p,x2p) + weakmul_p521(xy2,x_1,y2) + +// t2 = (y + z)^2 + + sqr_p521(t2,t1) + +// d = 12 * xy2 - 9 * x4p +// t1 = y^2 + 2 * y * z + + cmsub_p521(d,12,xy2,9,x4p) + sub_p521(t1,t2,z2) + +// y4 = y^4 + + sqr_p521(y4,y2) + +// z_3' = 2 * y * z +// dx2 = d * x2p + + sub_p521(z_3,t1,y2) + weakmul_p521(dx2,d,x2p) + +// x' = 4 * xy2 - d + + cmsub41_p521(x_3,xy2,d) + +// y' = 3 * dx2 - 8 * y4 + + cmsub38_p521(y_3,dx2,y4) + +// Restore stack and registers + + addq $NSPACE, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/p521/p521_jmixadd.S b/x86_att/p521/p521_jmixadd.S new file mode 100644 index 0000000000..52e1568b56 --- /dev/null +++ b/x86_att/p521/p521_jmixadd.S @@ -0,0 +1,756 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// Point mixed addition on NIST curve P-521 in Jacobian coordinates +// +// extern void p521_jmixadd +// (uint64_t p3[static 27],uint64_t p1[static 27],uint64_t p2[static 18]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples. +// A Jacobian triple (x,y,z) represents affine point (x/z^2,y/z^3). +// The "mixed" part means that p2 only has x and y coordinates, with the +// implicit z coordinate assumed to be the identity. It is assumed that +// all the coordinates of the input points p1 and p2 are fully reduced +// mod p_521, that the z coordinate of p1 is nonzero and that neither +// p1 =~= p2 or p1 =~= -p2, where "=~=" means "represents the same affine +// point as". +// +// Standard x86-64 ABI: RDI = p3, RSI = p1, RDX = p2 +// Microsoft x64 ABI: RCX = p3, RDX = p1, R8 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p521_jmixadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p521_jmixadd) + .text + +// Size of individual field elements + +#define NUMSIZE 72 + +// Stable homes for input arguments during main code sequence +// These are where they arrive except for input_y, initially in %rdx + +#define input_z %rdi +#define input_x %rsi +#define input_y %rcx + +// Pointer-offset pairs for inputs and outputs + +#define x_1 0(input_x) +#define y_1 NUMSIZE(input_x) +#define z_1 (2*NUMSIZE)(input_x) + +#define x_2 0(input_y) +#define y_2 NUMSIZE(input_y) + +#define x_3 0(input_z) +#define y_3 NUMSIZE(input_z) +#define z_3 (2*NUMSIZE)(input_z) + +// Pointer-offset pairs for temporaries, with some aliasing +// The tmp field is internal storage for field mul and sqr. +// NSPACE is the total stack needed for these temporaries + +#define zp2 (NUMSIZE*0)(%rsp) +#define ww (NUMSIZE*0)(%rsp) + +#define yd (NUMSIZE*1)(%rsp) +#define y2a (NUMSIZE*1)(%rsp) + +#define x2a (NUMSIZE*2)(%rsp) +#define zzx2 (NUMSIZE*2)(%rsp) + +#define zz (NUMSIZE*3)(%rsp) +#define t1 (NUMSIZE*3)(%rsp) + +#define t2 (NUMSIZE*4)(%rsp) +#define zzx1 (NUMSIZE*4)(%rsp) + +#define xd (NUMSIZE*5)(%rsp) + +#define tmp (NUMSIZE*6)(%rsp) + +#define NSPACE (NUMSIZE*6+64) + +// Corresponds exactly to bignum_mul_p521 + +#define mul_p521(P0,P1,P2) \ + xorl %ebp, %ebp ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + movq %r8, 432(%rsp) ; \ + mulxq 0x8+P1, %rbx, %r10 ; \ + adcq %rbx, %r9 ; \ + mulxq 0x10+P1, %rbx, %r11 ; \ + adcq %rbx, %r10 ; \ + mulxq 0x18+P1, %rbx, %r12 ; \ + adcq %rbx, %r11 ; \ + mulxq 0x20+P1, %rbx, %r13 ; \ + adcq %rbx, %r12 ; \ + mulxq 0x28+P1, %rbx, %r14 ; \ + adcq %rbx, %r13 ; \ + mulxq 0x30+P1, %rbx, %r15 ; \ + adcq %rbx, %r14 ; \ + mulxq 0x38+P1, %rbx, %r8 ; \ + adcq %rbx, %r15 ; \ + adcq %rbp, %r8 ; \ + movq 0x8+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq %r9, 440(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rbp, %r9 ; \ + adcq %rbp, %r9 ; \ + movq 0x10+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq %r10, 448(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rbp, %r10 ; \ + adcq %rbp, %r10 ; \ + movq 0x18+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 456(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rbp, %r11 ; \ + adcq %rbp, %r11 ; \ + movq 0x20+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 464(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x38+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbp, %r12 ; \ + adcq %rbp, %r12 ; \ + movq 0x28+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 472(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x38+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rbp, %r13 ; \ + adcq %rbp, %r13 ; \ + movq 0x30+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 480(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x38+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rbp, %r14 ; \ + adcq %rbp, %r14 ; \ + movq 0x38+P2, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 488(%rsp) ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x38+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rbp, %r15 ; \ + adcq %rbp, %r15 ; \ + movq 0x40+P1, %rdx ; \ + xorl %ebp, %ebp ; \ + mulxq P2, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P2, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P2, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P2, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P2, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P2, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P2, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P2, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbp, %rbx ; \ + adcq %rbx, %rbp ; \ + movq 0x40+P2, %rdx ; \ + xorl %eax, %eax ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rbp ; \ + mulxq 0x40+P1, %rax, %rbx ; \ + adcq %rax, %rbp ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rbp, %r15 ; \ + shrq $0x9, %rbp ; \ + addq %rax, %rbp ; \ + stc; \ + adcq 432(%rsp), %r8 ; \ + adcq 440(%rsp), %r9 ; \ + adcq 448(%rsp), %r10 ; \ + adcq 456(%rsp), %r11 ; \ + adcq 464(%rsp), %r12 ; \ + adcq 472(%rsp), %r13 ; \ + adcq 480(%rsp), %r14 ; \ + adcq 488(%rsp), %r15 ; \ + adcq $0xfffffffffffffe00, %rbp ; \ + cmc; \ + sbbq $0x0, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 ; \ + sbbq $0x0, %r14 ; \ + movq %r14, 0x30+P0 ; \ + sbbq $0x0, %r15 ; \ + movq %r15, 0x38+P0 ; \ + sbbq $0x0, %rbp ; \ + andq $0x1ff, %rbp ; \ + movq %rbp, 0x40+P0 + +// Corresponds exactly to bignum_sqr_p521 + +#define sqr_p521(P0,P1) \ + xorl %ebp, %ebp ; \ + movq P1, %rdx ; \ + mulxq 0x8+P1, %r9, %rax ; \ + movq %r9, 440(%rsp) ; \ + mulxq 0x10+P1, %r10, %rbx ; \ + adcxq %rax, %r10 ; \ + movq %r10, 448(%rsp) ; \ + mulxq 0x18+P1, %r11, %rax ; \ + adcxq %rbx, %r11 ; \ + mulxq 0x20+P1, %r12, %rbx ; \ + adcxq %rax, %r12 ; \ + mulxq 0x28+P1, %r13, %rax ; \ + adcxq %rbx, %r13 ; \ + mulxq 0x30+P1, %r14, %rbx ; \ + adcxq %rax, %r14 ; \ + mulxq 0x38+P1, %r15, %r8 ; \ + adcxq %rbx, %r15 ; \ + adcxq %rbp, %r8 ; \ + xorl %ebp, %ebp ; \ + movq 0x8+P1, %rdx ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + movq %r11, 456(%rsp) ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + movq %r12, 464(%rsp) ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x38+P1, %rax, %r9 ; \ + adcxq %rax, %r8 ; \ + adoxq %rbp, %r9 ; \ + movq 0x20+P1, %rdx ; \ + mulxq 0x28+P1, %rax, %r10 ; \ + adcxq %rax, %r9 ; \ + adoxq %rbp, %r10 ; \ + adcxq %rbp, %r10 ; \ + xorl %ebp, %ebp ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + movq %r13, 472(%rsp) ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + movq %r14, 480(%rsp) ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + movq 0x30+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %r11 ; \ + adcxq %rax, %r10 ; \ + adoxq %rbp, %r11 ; \ + mulxq 0x28+P1, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbp, %r12 ; \ + adcxq %rbp, %r12 ; \ + xorl %ebp, %ebp ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %r8 ; \ + movq %r15, 488(%rsp) ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + movq 0x38+P1, %rdx ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x28+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rbp, %r13 ; \ + mulxq 0x30+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rbp, %r14 ; \ + adcxq %rbp, %r14 ; \ + xorl %ebp, %ebp ; \ + movq P1, %rdx ; \ + mulxq %rdx, %rax, %rbx ; \ + movq %rax, 432(%rsp) ; \ + movq 440(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 440(%rsp) ; \ + movq 448(%rsp), %rax ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 448(%rsp) ; \ + movq 456(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 456(%rsp) ; \ + movq 464(%rsp), %rax ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 464(%rsp) ; \ + movq 472(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 472(%rsp) ; \ + movq 480(%rsp), %rax ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %rax, %rax ; \ + adoxq %rdx, %rax ; \ + movq %rax, 480(%rsp) ; \ + movq 488(%rsp), %rax ; \ + adcxq %rax, %rax ; \ + adoxq %rbx, %rax ; \ + movq %rax, 488(%rsp) ; \ + movq 0x20+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r8, %r8 ; \ + adoxq %rdx, %r8 ; \ + adcxq %r9, %r9 ; \ + adoxq %rbx, %r9 ; \ + movq 0x28+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r10, %r10 ; \ + adoxq %rdx, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rbx, %r11 ; \ + movq 0x30+P1, %rdx ; \ + mulxq %rdx, %rdx, %rbx ; \ + adcxq %r12, %r12 ; \ + adoxq %rdx, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rbx, %r13 ; \ + movq 0x38+P1, %rdx ; \ + mulxq %rdx, %rdx, %r15 ; \ + adcxq %r14, %r14 ; \ + adoxq %rdx, %r14 ; \ + adcxq %rbp, %r15 ; \ + adoxq %rbp, %r15 ; \ + movq 0x40+P1, %rdx ; \ + movq %rdx, %rbp ; \ + imulq %rbp, %rbp ; \ + addq %rdx, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x18+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x20+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x28+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x30+P1, %rax, %rbx ; \ + adcxq %rax, %r14 ; \ + adoxq %rbx, %r15 ; \ + mulxq 0x38+P1, %rax, %rbx ; \ + adcxq %rax, %r15 ; \ + adoxq %rbx, %rbp ; \ + adcq $0x0, %rbp ; \ + movq %r8, %rax ; \ + andq $0x1ff, %rax ; \ + shrdq $0x9, %r9, %r8 ; \ + shrdq $0x9, %r10, %r9 ; \ + shrdq $0x9, %r11, %r10 ; \ + shrdq $0x9, %r12, %r11 ; \ + shrdq $0x9, %r13, %r12 ; \ + shrdq $0x9, %r14, %r13 ; \ + shrdq $0x9, %r15, %r14 ; \ + shrdq $0x9, %rbp, %r15 ; \ + shrq $0x9, %rbp ; \ + addq %rax, %rbp ; \ + stc; \ + adcq 432(%rsp), %r8 ; \ + adcq 440(%rsp), %r9 ; \ + adcq 448(%rsp), %r10 ; \ + adcq 456(%rsp), %r11 ; \ + adcq 464(%rsp), %r12 ; \ + adcq 472(%rsp), %r13 ; \ + adcq 480(%rsp), %r14 ; \ + adcq 488(%rsp), %r15 ; \ + adcq $0xfffffffffffffe00, %rbp ; \ + cmc; \ + sbbq $0x0, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x8+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x10+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x18+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x20+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x28+P0 ; \ + sbbq $0x0, %r14 ; \ + movq %r14, 0x30+P0 ; \ + sbbq $0x0, %r15 ; \ + movq %r15, 0x38+P0 ; \ + sbbq $0x0, %rbp ; \ + andq $0x1ff, %rbp ; \ + movq %rbp, 0x40+P0 + +// Corresponds exactly to bignum_sub_p521 + +#define sub_p521(P0,P1,P2) \ + movq P1, %rax ; \ + subq P2, %rax ; \ + movq 0x8+P1, %rdx ; \ + sbbq 0x8+P2, %rdx ; \ + movq 0x10+P1, %r8 ; \ + sbbq 0x10+P2, %r8 ; \ + movq 0x18+P1, %r9 ; \ + sbbq 0x18+P2, %r9 ; \ + movq 0x20+P1, %r10 ; \ + sbbq 0x20+P2, %r10 ; \ + movq 0x28+P1, %r11 ; \ + sbbq 0x28+P2, %r11 ; \ + movq 0x30+P1, %r12 ; \ + sbbq 0x30+P2, %r12 ; \ + movq 0x38+P1, %r13 ; \ + sbbq 0x38+P2, %r13 ; \ + movq 0x40+P1, %r14 ; \ + sbbq 0x40+P2, %r14 ; \ + sbbq $0x0, %rax ; \ + movq %rax, P0 ; \ + sbbq $0x0, %rdx ; \ + movq %rdx, 0x8+P0 ; \ + sbbq $0x0, %r8 ; \ + movq %r8, 0x10+P0 ; \ + sbbq $0x0, %r9 ; \ + movq %r9, 0x18+P0 ; \ + sbbq $0x0, %r10 ; \ + movq %r10, 0x20+P0 ; \ + sbbq $0x0, %r11 ; \ + movq %r11, 0x28+P0 ; \ + sbbq $0x0, %r12 ; \ + movq %r12, 0x30+P0 ; \ + sbbq $0x0, %r13 ; \ + movq %r13, 0x38+P0 ; \ + sbbq $0x0, %r14 ; \ + andq $0x1ff, %r14 ; \ + movq %r14, 0x40+P0 + +S2N_BN_SYMBOL(p521_jmixadd): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save registers and make room on stack for temporary variables + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Move the input arguments to stable places (two are already there) + + movq %rdx, input_y + +// Main code, just a sequence of basic field operations + + sqr_p521(zp2,z_1) + mul_p521(y2a,z_1,y_2) + + mul_p521(x2a,zp2,x_2) + mul_p521(y2a,zp2,y2a) + + sub_p521(xd,x2a,x_1) + sub_p521(yd,y2a,y_1) + + sqr_p521(zz,xd) + sqr_p521(ww,yd) + + mul_p521(zzx1,zz,x_1) + mul_p521(zzx2,zz,x2a) + + sub_p521(x_3,ww,zzx1) + sub_p521(t1,zzx2,zzx1) + + mul_p521(z_3,xd,z_1) + + sub_p521(x_3,x_3,zzx2) + + sub_p521(t2,zzx1,x_3) + + mul_p521(t1,t1,y_1) + mul_p521(t2,yd,t2) + + sub_p521(y_3,t2,t1) + +// Restore stack and registers + + addq $NSPACE, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From 15e269c7d5aa494802a95536ec94d3c53471493e Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 30 Sep 2022 14:01:39 -0700 Subject: [PATCH 15/42] Add X25519 function This provides the standard curve25519 "X25519" function including the specific input mangling from "https://www.rfc-editor.org/rfc/rfc7748" and proved correct against a spec based on Bernstein's original description. The inputs and outputs are the usual s2n-bignum numbers with four 64-bit word digits each. (That is, not arrays of bytes, though on a little-endian machine with no special alignment restrictions the difference amounts just to a pointer typecast.) s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/ca2f87e37b8ede100961d1dadd440f9fad0981e1 --- arm/curve25519/curve25519_x25519.S | 1337 ++++++++++++++++++ arm/curve25519/curve25519_x25519_alt.S | 1151 ++++++++++++++++ x86_att/curve25519/curve25519_x25519.S | 1245 +++++++++++++++++ x86_att/curve25519/curve25519_x25519_alt.S | 1413 ++++++++++++++++++++ 4 files changed, 5146 insertions(+) create mode 100644 arm/curve25519/curve25519_x25519.S create mode 100644 arm/curve25519/curve25519_x25519_alt.S create mode 100644 x86_att/curve25519/curve25519_x25519.S create mode 100644 x86_att/curve25519/curve25519_x25519_alt.S diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S new file mode 100644 index 0000000000..bea6d50ce4 --- /dev/null +++ b/arm/curve25519/curve25519_x25519.S @@ -0,0 +1,1337 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 +// Inputs scalar[4], point[4]; output res[4] +// +// extern void curve25519_x25519 +// (uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]) +// +// Given a scalar n and the X coordinate of an input point P = (X,Y) on +// curve25519 (Y can live in any extension field of characteristic 2^255-19), +// this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the +// point at infinity. Both n and X inputs are first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); +// in particular the lower three bits of n are set to zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable homes for the input result argument during the whole body +// and other variables that are only needed prior to the modular inverse. + +#define res x23 +#define i x20 +#define swap x21 + +// Pointers to result x coord to be written + +#define resx res, #0 + +// Pointer-offset pairs for temporaries on stack with some aliasing. +// Both dmsn and dnsm need space for >= 5 digits, and we allocate 8 + +#define scalar sp, #(0*NUMSIZE) + +#define pointx sp, #(1*NUMSIZE) + +#define zm sp, #(2*NUMSIZE) +#define sm sp, #(2*NUMSIZE) +#define dpro sp, #(2*NUMSIZE) + +#define sn sp, #(3*NUMSIZE) + +#define dm sp, #(4*NUMSIZE) + +#define zn sp, #(5*NUMSIZE) +#define dn sp, #(5*NUMSIZE) +#define e sp, #(5*NUMSIZE) + +#define dmsn sp, #(6*NUMSIZE) +#define p sp, #(6*NUMSIZE) + +#define xm sp, #(8*NUMSIZE) +#define dnsm sp, #(8*NUMSIZE) +#define spro sp, #(8*NUMSIZE) + +#define xn sp, #(10*NUMSIZE) +#define s sp, #(10*NUMSIZE) + +#define d sp, #(11*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (12*NUMSIZE) + +// Macros wrapping up the basic field operation calls +// bignum_mul_p25519 and bignum_sqr_p25519. +// These two are only trivially different from pure +// function calls to those subroutines. + +#define mul_p25519(p0,p1,p2) \ + ldp x3, x4, [p1]; \ + ldp x5, x6, [p2]; \ + mul x7, x3, x5; \ + umulh x8, x3, x5; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [p1+16]; \ + ldp x5, x6, [p2+16]; \ + mul x11, x3, x5; \ + umulh x12, x3, x5; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [p1+16]; \ + ldp x15, x16, [p1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [p2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + and x5, x11, #0xffffffff; \ + lsr x4, x11, #32; \ + mul x5, x3, x5; \ + mul x4, x3, x4; \ + adds x7, x7, x5; \ + and x5, x12, #0xffffffff; \ + lsr x12, x12, #32; \ + mul x5, x3, x5; \ + mul x12, x3, x12; \ + adcs x8, x8, x5; \ + and x5, x13, #0xffffffff; \ + lsr x13, x13, #32; \ + mul x5, x3, x5; \ + mul x13, x3, x13; \ + adcs x9, x9, x5; \ + and x5, x14, #0xffffffff; \ + lsr x14, x14, #32; \ + mul x5, x3, x5; \ + mul x14, x3, x14; \ + adcs x10, x10, x5; \ + cset x11, cs; \ + lsl x5, x4, #32; \ + adds x7, x7, x5; \ + extr x5, x12, x4, #32; \ + adcs x8, x8, x5; \ + extr x5, x13, x12, #32; \ + adcs x9, x9, x5; \ + extr x5, x14, x13, #32; \ + adcs x10, x10, x5; \ + lsr x5, x14, #32; \ + adc x11, x11, x5; \ + cmn x10, x10; \ + orr x10, x10, #0x8000000000000000; \ + adc x0, x11, x11; \ + mov x3, #0x13; \ + madd x5, x3, x0, x3; \ + adds x7, x7, x5; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + csel x3, x3, xzr, cc; \ + subs x7, x7, x3; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + and x10, x10, #0x7fffffffffffffff; \ + stp x7, x8, [p0]; \ + stp x9, x10, [p0+16] + +#define sqr_p25519(p0,p1) \ + ldp x6, x7, [p1]; \ + ldp x10, x11, [p1+16]; \ + mul x4, x6, x10; \ + mul x9, x7, x11; \ + umulh x12, x6, x10; \ + subs x13, x6, x7; \ + cneg x13, x13, cc; \ + csetm x3, cc; \ + subs x2, x11, x10; \ + cneg x2, x2, cc; \ + mul x8, x13, x2; \ + umulh x2, x13, x2; \ + cinv x3, x3, cc; \ + eor x8, x8, x3; \ + eor x2, x2, x3; \ + adds x5, x4, x12; \ + adc x12, x12, xzr; \ + umulh x13, x7, x11; \ + adds x5, x5, x9; \ + adcs x12, x12, x13; \ + adc x13, x13, xzr; \ + adds x12, x12, x9; \ + adc x13, x13, xzr; \ + cmn x3, #0x1; \ + adcs x5, x5, x8; \ + adcs x12, x12, x2; \ + adc x13, x13, x3; \ + adds x4, x4, x4; \ + adcs x5, x5, x5; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adc x14, xzr, xzr; \ + mul x2, x6, x6; \ + mul x8, x7, x7; \ + mul x15, x6, x7; \ + umulh x3, x6, x6; \ + umulh x9, x7, x7; \ + umulh x16, x6, x7; \ + adds x3, x3, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x3, x3, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x4, x4, x8; \ + adcs x5, x5, x9; \ + adcs x12, x12, xzr; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x6, x10, x10; \ + mul x8, x11, x11; \ + mul x15, x10, x11; \ + umulh x7, x10, x10; \ + umulh x9, x11, x11; \ + umulh x16, x10, x11; \ + adds x7, x7, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x7, x7, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x6, x6, x12; \ + adcs x7, x7, x13; \ + adcs x8, x8, x14; \ + adc x9, x9, xzr; \ + mov x10, #0x26; \ + and x11, x6, #0xffffffff; \ + lsr x12, x6, #32; \ + mul x11, x10, x11; \ + mul x12, x10, x12; \ + adds x2, x2, x11; \ + and x11, x7, #0xffffffff; \ + lsr x7, x7, #32; \ + mul x11, x10, x11; \ + mul x7, x10, x7; \ + adcs x3, x3, x11; \ + and x11, x8, #0xffffffff; \ + lsr x8, x8, #32; \ + mul x11, x10, x11; \ + mul x8, x10, x8; \ + adcs x4, x4, x11; \ + and x11, x9, #0xffffffff; \ + lsr x9, x9, #32; \ + mul x11, x10, x11; \ + mul x9, x10, x9; \ + adcs x5, x5, x11; \ + cset x6, cs; \ + lsl x11, x12, #32; \ + adds x2, x2, x11; \ + extr x11, x7, x12, #32; \ + adcs x3, x3, x11; \ + extr x11, x8, x7, #32; \ + adcs x4, x4, x11; \ + extr x11, x9, x8, #32; \ + adcs x5, x5, x11; \ + lsr x11, x9, #32; \ + adc x6, x6, x11; \ + cmn x5, x5; \ + orr x5, x5, #0x8000000000000000; \ + adc x13, x6, x6; \ + mov x10, #0x13; \ + madd x11, x10, x13, x10; \ + adds x2, x2, x11; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csel x10, x10, xzr, cc; \ + subs x2, x2, x10; \ + sbcs x3, x3, xzr; \ + sbcs x4, x4, xzr; \ + sbc x5, x5, xzr; \ + and x5, x5, #0x7fffffffffffffff; \ + stp x2, x3, [p0]; \ + stp x4, x5, [p0+16] + +// Multiplication just giving a 5-digit result (actually < 39 * 2^256) +// by not doing anything beyond the first stage of reduction + +#define mul_5(p0,p1,p2) \ + ldp x3, x4, [p1]; \ + ldp x5, x6, [p2]; \ + mul x7, x3, x5; \ + umulh x8, x3, x5; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [p1+16]; \ + ldp x5, x6, [p2+16]; \ + mul x11, x3, x5; \ + umulh x12, x3, x5; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [p1+16]; \ + ldp x15, x16, [p1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [p2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + and x5, x11, #0xffffffff; \ + lsr x4, x11, #32; \ + mul x5, x3, x5; \ + mul x4, x3, x4; \ + adds x7, x7, x5; \ + and x5, x12, #0xffffffff; \ + lsr x12, x12, #32; \ + mul x5, x3, x5; \ + mul x12, x3, x12; \ + adcs x8, x8, x5; \ + and x5, x13, #0xffffffff; \ + lsr x13, x13, #32; \ + mul x5, x3, x5; \ + mul x13, x3, x13; \ + adcs x9, x9, x5; \ + and x5, x14, #0xffffffff; \ + lsr x14, x14, #32; \ + mul x5, x3, x5; \ + mul x14, x3, x14; \ + adcs x10, x10, x5; \ + cset x11, cs; \ + lsl x5, x4, #32; \ + adds x7, x7, x5; \ + extr x5, x12, x4, #32; \ + adcs x8, x8, x5; \ + extr x5, x13, x12, #32; \ + adcs x9, x9, x5; \ + extr x5, x14, x13, #32; \ + adcs x10, x10, x5; \ + lsr x5, x14, #32; \ + adc x11, x11, x5; \ + stp x7, x8, [p0]; \ + stp x9, x10, [p0+16]; \ + str x11, [p0+32] + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(p0,p1) \ + ldp x6, x7, [p1]; \ + ldp x10, x11, [p1+16]; \ + mul x4, x6, x10; \ + mul x9, x7, x11; \ + umulh x12, x6, x10; \ + subs x13, x6, x7; \ + cneg x13, x13, cc; \ + csetm x3, cc; \ + subs x2, x11, x10; \ + cneg x2, x2, cc; \ + mul x8, x13, x2; \ + umulh x2, x13, x2; \ + cinv x3, x3, cc; \ + eor x8, x8, x3; \ + eor x2, x2, x3; \ + adds x5, x4, x12; \ + adc x12, x12, xzr; \ + umulh x13, x7, x11; \ + adds x5, x5, x9; \ + adcs x12, x12, x13; \ + adc x13, x13, xzr; \ + adds x12, x12, x9; \ + adc x13, x13, xzr; \ + cmn x3, #0x1; \ + adcs x5, x5, x8; \ + adcs x12, x12, x2; \ + adc x13, x13, x3; \ + adds x4, x4, x4; \ + adcs x5, x5, x5; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adc x14, xzr, xzr; \ + mul x2, x6, x6; \ + mul x8, x7, x7; \ + mul x15, x6, x7; \ + umulh x3, x6, x6; \ + umulh x9, x7, x7; \ + umulh x16, x6, x7; \ + adds x3, x3, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x3, x3, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x4, x4, x8; \ + adcs x5, x5, x9; \ + adcs x12, x12, xzr; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x6, x10, x10; \ + mul x8, x11, x11; \ + mul x15, x10, x11; \ + umulh x7, x10, x10; \ + umulh x9, x11, x11; \ + umulh x16, x10, x11; \ + adds x7, x7, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x7, x7, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + adds x6, x6, x12; \ + adcs x7, x7, x13; \ + adcs x8, x8, x14; \ + adc x9, x9, xzr; \ + mov x10, #0x26; \ + and x11, x6, #0xffffffff; \ + lsr x12, x6, #32; \ + mul x11, x10, x11; \ + mul x12, x10, x12; \ + adds x2, x2, x11; \ + and x11, x7, #0xffffffff; \ + lsr x7, x7, #32; \ + mul x11, x10, x11; \ + mul x7, x10, x7; \ + adcs x3, x3, x11; \ + and x11, x8, #0xffffffff; \ + lsr x8, x8, #32; \ + mul x11, x10, x11; \ + mul x8, x10, x8; \ + adcs x4, x4, x11; \ + and x11, x9, #0xffffffff; \ + lsr x9, x9, #32; \ + mul x11, x10, x11; \ + mul x9, x10, x9; \ + adcs x5, x5, x11; \ + cset x6, cs; \ + lsl x11, x12, #32; \ + adds x2, x2, x11; \ + extr x11, x7, x12, #32; \ + adcs x3, x3, x11; \ + extr x11, x8, x7, #32; \ + adcs x4, x4, x11; \ + extr x11, x9, x8, #32; \ + adcs x5, x5, x11; \ + lsr x11, x9, #32; \ + adc x6, x6, x11; \ + cmn x5, x5; \ + bic x5, x5, #0x8000000000000000; \ + adc x13, x6, x6; \ + mov x10, #0x13; \ + mul x11, x13, x10; \ + adds x2, x2, x11; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adc x5, x5, xzr; \ + stp x2, x3, [p0]; \ + stp x4, x5, [p0+16] + +// Plain 4-digit add without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result + +#define add_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x4, x5, [p2]; \ + adds x0, x0, x4; \ + adcs x1, x1, x5; \ + ldp x2, x3, [p1+16]; \ + ldp x6, x7, [p2+16]; \ + adcs x2, x2, x6; \ + adcs x3, x3, x7; \ + stp x0, x1, [p0]; \ + stp x2, x3, [p0+16] + +// Add 5-digit inputs and normalize to 4 digits + +#define add5_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x4, x5, [p2]; \ + adds x0, x0, x4; \ + adcs x1, x1, x5; \ + ldp x2, x3, [p1+16]; \ + ldp x6, x7, [p2+16]; \ + adcs x2, x2, x6; \ + adcs x3, x3, x7; \ + ldr x4, [p1+32]; \ + ldr x5, [p2+32]; \ + adc x4, x4, x5; \ + cmn x3, x3; \ + bic x3, x3, #0x8000000000000000; \ + adc x8, x4, x4; \ + mov x7, #19; \ + mul x11, x7, x8; \ + adds x0, x0, x11; \ + adcs x1, x1, xzr; \ + adcs x2, x2, xzr; \ + adc x3, x3, xzr; \ + stp x0, x1, [p0]; \ + stp x2, x3, [p0+16] + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(p0,p1,p2) \ + ldp x5, x6, [p1]; \ + ldp x4, x3, [p2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [p1+16]; \ + ldp x4, x3, [p2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x3, #19; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + mov x4, #0x8000000000000000; \ + sbc x8, x8, x4; \ + stp x5, x6, [p0]; \ + stp x7, x8, [p0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(p0,p1,p2) \ + ldp x5, x6, [p1]; \ + ldp x4, x3, [p2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [p1+16]; \ + ldp x4, x3, [p2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [p0]; \ + stp x7, x8, [p0+16] + +// 5-digit subtraction with upward bias to make it positive, adding +// 1000 * (2^255 - 19) = 2^256 * 500 - 19000, then normalizing to 4 digits + +#define sub5_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x4, x5, [p2]; \ + subs x0, x0, x4; \ + sbcs x1, x1, x5; \ + ldp x2, x3, [p1+16]; \ + ldp x6, x7, [p2+16]; \ + sbcs x2, x2, x6; \ + sbcs x3, x3, x7; \ + ldr x4, [p1+32]; \ + ldr x5, [p2+32]; \ + sbc x4, x4, x5; \ + mov x7, -19000; \ + adds x0, x0, x7; \ + sbcs x1, x1, xzr; \ + sbcs x2, x2, xzr; \ + sbcs x3, x3, xzr; \ + mov x7, 499; \ + adc x4, x4, x7; \ + cmn x3, x3; \ + bic x3, x3, #0x8000000000000000; \ + adc x8, x4, x4; \ + mov x7, #19; \ + mul x11, x7, x8; \ + adds x0, x0, x11; \ + adcs x1, x1, xzr; \ + adcs x2, x2, xzr; \ + adc x3, x3, xzr; \ + stp x0, x1, [p0]; \ + stp x2, x3, [p0+16] + +// Combined z = c * x + y with reduction only < 2 * p_25519 +// where c is initially in the X1 register. It is assumed +// that 19 * (c * x + y) < 2^60 * 2^256 so we don't need a +// high mul in the final part. + +#define cmadd_4(p0,p2,p3) \ + ldp x7, x8, [p2]; \ + ldp x9, x10, [p2+16]; \ + mul x3, x1, x7; \ + mul x4, x1, x8; \ + mul x5, x1, x9; \ + mul x6, x1, x10; \ + umulh x7, x1, x7; \ + umulh x8, x1, x8; \ + umulh x9, x1, x9; \ + umulh x10, x1, x10; \ + adds x4, x4, x7; \ + adcs x5, x5, x8; \ + adcs x6, x6, x9; \ + adc x10, x10, xzr; \ + ldp x7, x8, [p3]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x7, x8, [p3+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + adc x10, x10, xzr; \ + cmn x6, x6; \ + bic x6, x6, #0x8000000000000000; \ + adc x8, x10, x10; \ + mov x9, #19; \ + mul x7, x8, x9; \ + adds x3, x3, x7; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [p0]; \ + stp x5, x6, [p0+16] + +// Multiplex: z := if NZ then x else y + +#define mux_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x2, x3, [p2]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0]; \ + ldp x0, x1, [p1+16]; \ + ldp x2, x3, [p2+16]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0+16] + +S2N_BN_SYMBOL(curve25519_x25519): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the inputs to the local variables while mangling them: +// +// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. +// Actually the top zero doesn't matter since the loop below +// never looks at it, so we don't literally modify that. +// +// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 + + ldp x10, x11, [x1] + bic x10, x10, #7 + stp x10, x11, [scalar] + ldp x12, x13, [x1, #16] + orr x13, x13, #0x4000000000000000 + stp x12, x13, [scalar+16] + + ldp x10, x11, [x2] + subs x6, x10, #-19 + adcs x7, x11, xzr + ldp x12, x13, [x2, #16] + and x13, x13, #0x7fffffffffffffff + adcs x8, x12, xzr + mov x9, #0x7fffffffffffffff + sbcs x9, x13, x9 + + csel x10, x6, x10, cs + csel x11, x7, x11, cs + csel x12, x8, x12, cs + csel x13, x9, x13, cs + + stp x10, x11, [pointx] + stp x12, x13, [pointx+16] + +// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 +// We use the fact that the point x coordinate is still in registers + + mov x2, #1 + stp x2, xzr, [xn] + stp xzr, xzr, [xn+16] + stp xzr, xzr, [zn] + stp xzr, xzr, [zn+16] + stp x10, x11, [xm] + stp x12, x13, [xm+16] + stp x2, xzr, [zm] + stp xzr, xzr, [zm+16] + mov swap, xzr + +// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) +// This starts at 254, and so implicitly masks bit 255 of the scalar. + + mov i, #254 + +scalarloop: + +// sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn +// The adds don't need any normalization as they're fed to muls +// Just make sure the subs fit in 4 digits + + sub_4(dm, xm, zm) + add_4(sn, xn, zn) + sub_4(dn, xn, zn) + add_4(sm, xm, zm) + +// ADDING: dmsn = dm * sn; dnsm = sm * dn +// DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + + mul_5(dmsn,sn,dm) + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #1 + + cmp swap, x2 + mov swap, x2 + + mux_4(d,dm,dn) + mux_4(s,sm,sn) + + mul_5(dnsm,sm,dn) + +// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits + + sqr_4(d,d) + +// ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 +// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits + + sub5_4(dpro,dmsn,dnsm) + sqr_4(s,s) + add5_4(spro,dmsn,dnsm) + sqr_4(dpro,dpro) + +// DOUBLING: p = 4 * xt * zt = s - d + + sub_twice4(p,s,d) + +// ADDING: xm' = (dmsn + dnsm)^2 + + sqr_p25519(xm,spro) + +// DOUBLING: e = 121666 * p + d + + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + +// DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d + + mul_p25519(xn,s,d) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_p25519(zm,dpro,pointx) + +// DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) +// = p * (d + 121666 * p) + + mul_p25519(zn,p,e) + +// Loop down as far as 0 (inclusive) + + subs i, i, #1 + bcs scalarloop + +// Since the scalar was forced to be a multiple of 8, we know it's even. +// Hence there is no need to multiplex: the projective answer is (xn,zn) +// and we can ignore (xm,zm); indeed we could have avoided the last three +// differential additions and just done the doublings. +// First set up the constant sn = 2^255 - 19 for the modular inverse. + + mov x0, #-19 + mov x1, #-1 + mov x2, #0x7fffffffffffffff + stp x0, x1, [sn] + stp x1, x2, [sn+16] + +// Prepare to call the modular inverse function to get zm = 1/zn + + mov x0, #4 + add x1, zm + add x2, zn + add x3, sn + add x4, p + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// Since we eventually want to return 0 when the result is the point at +// infinity, we force xn = 0 whenever zn = 0. This avoids building in a +// dependency on the behavior of modular inverse in out-of-scope cases. + + ldp x0, x1, [zn] + ldp x2, x3, [zn+16] + orr x0, x0, x1 + orr x2, x2, x3 + orr x4, x0, x2 + cmp x4, xzr + ldp x0, x1, [xn] + csel x0, x0, xzr, ne + csel x1, x1, xzr, ne + ldp x2, x3, [xn+16] + stp x0, x1, [xn] + csel x2, x2, xzr, ne + csel x3, x3, xzr, ne + stp x2, x3, [xn+16] + +// Now the result is xn * (1/zn). + + mul_p25519(resx,xn,zm) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S new file mode 100644 index 0000000000..8a0495f97c --- /dev/null +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -0,0 +1,1151 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 +// Inputs scalar[4], point[4]; output res[4] +// +// extern void curve25519_x25519_alt +// (uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]) +// +// Given a scalar n and the X coordinate of an input point P = (X,Y) on +// curve25519 (Y can live in any extension field of characteristic 2^255-19), +// this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the +// point at infinity. Both n and X inputs are first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); +// in particular the lower three bits of n are set to zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable homes for the input result argument during the whole body +// and other variables that are only needed prior to the modular inverse. + +#define res x23 +#define i x20 +#define swap x21 + +// Pointers to result x coord to be written + +#define resx res, #0 + +// Pointer-offset pairs for temporaries on stack with some aliasing. +// Both dmsn and dnsm need space for >= 5 digits, and we allocate 8 + +#define scalar sp, #(0*NUMSIZE) + +#define pointx sp, #(1*NUMSIZE) + +#define zm sp, #(2*NUMSIZE) +#define sm sp, #(2*NUMSIZE) +#define dpro sp, #(2*NUMSIZE) + +#define sn sp, #(3*NUMSIZE) + +#define dm sp, #(4*NUMSIZE) + +#define zn sp, #(5*NUMSIZE) +#define dn sp, #(5*NUMSIZE) +#define e sp, #(5*NUMSIZE) + +#define dmsn sp, #(6*NUMSIZE) +#define p sp, #(6*NUMSIZE) + +#define xm sp, #(8*NUMSIZE) +#define dnsm sp, #(8*NUMSIZE) +#define spro sp, #(8*NUMSIZE) + +#define xn sp, #(10*NUMSIZE) +#define s sp, #(10*NUMSIZE) + +#define d sp, #(11*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (12*NUMSIZE) + +// Macros wrapping up the basic field operation calls +// bignum_mul_p25519_alt and bignum_sqr_p25519_alt. +// These two are only trivially different from pure +// function calls to those subroutines. + +#define mul_p25519(p0,p1,p2) \ + ldp x3, x4, [p1]; \ + ldp x7, x8, [p2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [p2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [p1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #38; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, hs; \ + adds x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + orr x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #19; \ + madd x11, x7, x8, x7; \ + adds x12, x12, x11; \ + adcs x13, x13, xzr; \ + adcs x14, x14, xzr; \ + adcs x15, x15, xzr; \ + csel x7, x7, xzr, lo; \ + subs x12, x12, x7; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + and x15, x15, #0x7fffffffffffffff; \ + stp x12, x13, [p0]; \ + stp x14, x15, [p0+16] + +#define sqr_p25519(p0,p1) \ + ldp x2, x3, [p1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [p1+16]; \ + mul x11, x2, x5; \ + umulh x12, x2, x5; \ + mul x7, x2, x4; \ + umulh x6, x2, x4; \ + adds x10, x10, x7; \ + adcs x11, x11, x6; \ + mul x7, x3, x4; \ + umulh x6, x3, x4; \ + adc x6, x6, xzr; \ + adds x11, x11, x7; \ + mul x13, x4, x5; \ + umulh x14, x4, x5; \ + adcs x12, x12, x6; \ + mul x7, x3, x5; \ + umulh x6, x3, x5; \ + adc x6, x6, xzr; \ + adds x12, x12, x7; \ + adcs x13, x13, x6; \ + adc x14, x14, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + cset x6, hs; \ + umulh x7, x2, x2; \ + mul x8, x2, x2; \ + adds x9, x9, x7; \ + mul x7, x3, x3; \ + adcs x10, x10, x7; \ + umulh x7, x3, x3; \ + adcs x11, x11, x7; \ + mul x7, x4, x4; \ + adcs x12, x12, x7; \ + umulh x7, x4, x4; \ + adcs x13, x13, x7; \ + mul x7, x5, x5; \ + adcs x14, x14, x7; \ + umulh x7, x5, x5; \ + adc x6, x6, x7; \ + mov x3, #38; \ + mul x7, x3, x12; \ + umulh x4, x3, x12; \ + adds x8, x8, x7; \ + mul x7, x3, x13; \ + umulh x13, x3, x13; \ + adcs x9, x9, x7; \ + mul x7, x3, x14; \ + umulh x14, x3, x14; \ + adcs x10, x10, x7; \ + mul x7, x3, x6; \ + umulh x6, x3, x6; \ + adcs x11, x11, x7; \ + cset x12, hs; \ + adds x9, x9, x4; \ + adcs x10, x10, x13; \ + adcs x11, x11, x14; \ + adc x12, x12, x6; \ + cmn x11, x11; \ + orr x11, x11, #0x8000000000000000; \ + adc x2, x12, x12; \ + mov x3, #19; \ + madd x7, x3, x2, x3; \ + adds x8, x8, x7; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + adcs x11, x11, xzr; \ + csel x3, x3, xzr, lo; \ + subs x8, x8, x3; \ + sbcs x9, x9, xzr; \ + sbcs x10, x10, xzr; \ + sbc x11, x11, xzr; \ + and x11, x11, #0x7fffffffffffffff; \ + stp x8, x9, [p0]; \ + stp x10, x11, [p0+16] + +// Multiplication just giving a 5-digit result (actually < 39 * 2^256) +// by not doing anything beyond the first stage of reduction + +#define mul_5(p0,p1,p2) \ + ldp x3, x4, [p1]; \ + ldp x7, x8, [p2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [p2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [p1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #38; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, hs; \ + adds x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, x4; \ + adc x16, x16, x5; \ + stp x12, x13, [p0]; \ + stp x14, x15, [p0+16]; \ + str x16, [p0+32] + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(p0,p1) \ + ldp x2, x3, [p1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [p1+16]; \ + mul x11, x2, x5; \ + umulh x12, x2, x5; \ + mul x7, x2, x4; \ + umulh x6, x2, x4; \ + adds x10, x10, x7; \ + adcs x11, x11, x6; \ + mul x7, x3, x4; \ + umulh x6, x3, x4; \ + adc x6, x6, xzr; \ + adds x11, x11, x7; \ + mul x13, x4, x5; \ + umulh x14, x4, x5; \ + adcs x12, x12, x6; \ + mul x7, x3, x5; \ + umulh x6, x3, x5; \ + adc x6, x6, xzr; \ + adds x12, x12, x7; \ + adcs x13, x13, x6; \ + adc x14, x14, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + cset x6, hs; \ + umulh x7, x2, x2; \ + mul x8, x2, x2; \ + adds x9, x9, x7; \ + mul x7, x3, x3; \ + adcs x10, x10, x7; \ + umulh x7, x3, x3; \ + adcs x11, x11, x7; \ + mul x7, x4, x4; \ + adcs x12, x12, x7; \ + umulh x7, x4, x4; \ + adcs x13, x13, x7; \ + mul x7, x5, x5; \ + adcs x14, x14, x7; \ + umulh x7, x5, x5; \ + adc x6, x6, x7; \ + mov x3, #38; \ + mul x7, x3, x12; \ + umulh x4, x3, x12; \ + adds x8, x8, x7; \ + mul x7, x3, x13; \ + umulh x13, x3, x13; \ + adcs x9, x9, x7; \ + mul x7, x3, x14; \ + umulh x14, x3, x14; \ + adcs x10, x10, x7; \ + mul x7, x3, x6; \ + umulh x6, x3, x6; \ + adcs x11, x11, x7; \ + cset x12, hs; \ + adds x9, x9, x4; \ + adcs x10, x10, x13; \ + adcs x11, x11, x14; \ + adc x12, x12, x6; \ + cmn x11, x11; \ + bic x11, x11, #0x8000000000000000; \ + adc x2, x12, x12; \ + mov x3, #19; \ + mul x7, x3, x2; \ + adds x8, x8, x7; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + adc x11, x11, xzr; \ + stp x8, x9, [p0]; \ + stp x10, x11, [p0+16] + +// Plain 4-digit add without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result + +#define add_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x4, x5, [p2]; \ + adds x0, x0, x4; \ + adcs x1, x1, x5; \ + ldp x2, x3, [p1+16]; \ + ldp x6, x7, [p2+16]; \ + adcs x2, x2, x6; \ + adcs x3, x3, x7; \ + stp x0, x1, [p0]; \ + stp x2, x3, [p0+16] + +// Add 5-digit inputs and normalize to 4 digits + +#define add5_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x4, x5, [p2]; \ + adds x0, x0, x4; \ + adcs x1, x1, x5; \ + ldp x2, x3, [p1+16]; \ + ldp x6, x7, [p2+16]; \ + adcs x2, x2, x6; \ + adcs x3, x3, x7; \ + ldr x4, [p1+32]; \ + ldr x5, [p2+32]; \ + adc x4, x4, x5; \ + cmn x3, x3; \ + bic x3, x3, #0x8000000000000000; \ + adc x8, x4, x4; \ + mov x7, #19; \ + mul x11, x7, x8; \ + adds x0, x0, x11; \ + adcs x1, x1, xzr; \ + adcs x2, x2, xzr; \ + adc x3, x3, xzr; \ + stp x0, x1, [p0]; \ + stp x2, x3, [p0+16] + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(p0,p1,p2) \ + ldp x5, x6, [p1]; \ + ldp x4, x3, [p2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [p1+16]; \ + ldp x4, x3, [p2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x3, #19; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + mov x4, #0x8000000000000000; \ + sbc x8, x8, x4; \ + stp x5, x6, [p0]; \ + stp x7, x8, [p0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(p0,p1,p2) \ + ldp x5, x6, [p1]; \ + ldp x4, x3, [p2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [p1+16]; \ + ldp x4, x3, [p2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [p0]; \ + stp x7, x8, [p0+16] + +// 5-digit subtraction with upward bias to make it positive, adding +// 1000 * (2^255 - 19) = 2^256 * 500 - 19000, then normalizing to 4 digits + +#define sub5_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x4, x5, [p2]; \ + subs x0, x0, x4; \ + sbcs x1, x1, x5; \ + ldp x2, x3, [p1+16]; \ + ldp x6, x7, [p2+16]; \ + sbcs x2, x2, x6; \ + sbcs x3, x3, x7; \ + ldr x4, [p1+32]; \ + ldr x5, [p2+32]; \ + sbc x4, x4, x5; \ + mov x7, -19000; \ + adds x0, x0, x7; \ + sbcs x1, x1, xzr; \ + sbcs x2, x2, xzr; \ + sbcs x3, x3, xzr; \ + mov x7, 499; \ + adc x4, x4, x7; \ + cmn x3, x3; \ + bic x3, x3, #0x8000000000000000; \ + adc x8, x4, x4; \ + mov x7, #19; \ + mul x11, x7, x8; \ + adds x0, x0, x11; \ + adcs x1, x1, xzr; \ + adcs x2, x2, xzr; \ + adc x3, x3, xzr; \ + stp x0, x1, [p0]; \ + stp x2, x3, [p0+16] + +// Combined z = c * x + y with reduction only < 2 * p_25519 +// where c is initially in the X1 register. It is assumed +// that 19 * (c * x + y) < 2^60 * 2^256 so we don't need a +// high mul in the final part. + +#define cmadd_4(p0,p2,p3) \ + ldp x7, x8, [p2]; \ + ldp x9, x10, [p2+16]; \ + mul x3, x1, x7; \ + mul x4, x1, x8; \ + mul x5, x1, x9; \ + mul x6, x1, x10; \ + umulh x7, x1, x7; \ + umulh x8, x1, x8; \ + umulh x9, x1, x9; \ + umulh x10, x1, x10; \ + adds x4, x4, x7; \ + adcs x5, x5, x8; \ + adcs x6, x6, x9; \ + adc x10, x10, xzr; \ + ldp x7, x8, [p3]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x7, x8, [p3+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + adc x10, x10, xzr; \ + cmn x6, x6; \ + bic x6, x6, #0x8000000000000000; \ + adc x8, x10, x10; \ + mov x9, #19; \ + mul x7, x8, x9; \ + adds x3, x3, x7; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [p0]; \ + stp x5, x6, [p0+16] + +// Multiplex: z := if NZ then x else y + +#define mux_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x2, x3, [p2]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0]; \ + ldp x0, x1, [p1+16]; \ + ldp x2, x3, [p2+16]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0+16] + +S2N_BN_SYMBOL(curve25519_x25519_alt): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the inputs to the local variables while mangling them: +// +// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. +// Actually the top zero doesn't matter since the loop below +// never looks at it, so we don't literally modify that. +// +// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 + + ldp x10, x11, [x1] + bic x10, x10, #7 + stp x10, x11, [scalar] + ldp x12, x13, [x1, #16] + orr x13, x13, #0x4000000000000000 + stp x12, x13, [scalar+16] + + ldp x10, x11, [x2] + subs x6, x10, #-19 + adcs x7, x11, xzr + ldp x12, x13, [x2, #16] + and x13, x13, #0x7fffffffffffffff + adcs x8, x12, xzr + mov x9, #0x7fffffffffffffff + sbcs x9, x13, x9 + + csel x10, x6, x10, cs + csel x11, x7, x11, cs + csel x12, x8, x12, cs + csel x13, x9, x13, cs + + stp x10, x11, [pointx] + stp x12, x13, [pointx+16] + +// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 +// We use the fact that the point x coordinate is still in registers + + mov x2, #1 + stp x2, xzr, [xn] + stp xzr, xzr, [xn+16] + stp xzr, xzr, [zn] + stp xzr, xzr, [zn+16] + stp x10, x11, [xm] + stp x12, x13, [xm+16] + stp x2, xzr, [zm] + stp xzr, xzr, [zm+16] + mov swap, xzr + +// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) +// This starts at 254, and so implicitly masks bit 255 of the scalar. + + mov i, #254 + +scalarloop: + +// sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn +// The adds don't need any normalization as they're fed to muls +// Just make sure the subs fit in 4 digits + + sub_4(dm, xm, zm) + add_4(sn, xn, zn) + sub_4(dn, xn, zn) + add_4(sm, xm, zm) + +// ADDING: dmsn = dm * sn; dnsm = sm * dn +// DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + + mul_5(dmsn,sn,dm) + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #1 + + cmp swap, x2 + mov swap, x2 + + mux_4(d,dm,dn) + mux_4(s,sm,sn) + + mul_5(dnsm,sm,dn) + +// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits + + sqr_4(d,d) + +// ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 +// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits + + sub5_4(dpro,dmsn,dnsm) + sqr_4(s,s) + add5_4(spro,dmsn,dnsm) + sqr_4(dpro,dpro) + +// DOUBLING: p = 4 * xt * zt = s - d + + sub_twice4(p,s,d) + +// ADDING: xm' = (dmsn + dnsm)^2 + + sqr_p25519(xm,spro) + +// DOUBLING: e = 121666 * p + d + + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + +// DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d + + mul_p25519(xn,s,d) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_p25519(zm,dpro,pointx) + +// DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) +// = p * (d + 121666 * p) + + mul_p25519(zn,p,e) + +// Loop down as far as 0 (inclusive) + + subs i, i, #1 + bcs scalarloop + +// Since the scalar was forced to be a multiple of 8, we know it's even. +// Hence there is no need to multiplex: the projective answer is (xn,zn) +// and we can ignore (xm,zm); indeed we could have avoided the last three +// differential additions and just done the doublings. +// First set up the constant sn = 2^255 - 19 for the modular inverse. + + mov x0, #-19 + mov x1, #-1 + mov x2, #0x7fffffffffffffff + stp x0, x1, [sn] + stp x1, x2, [sn+16] + +// Prepare to call the modular inverse function to get zm = 1/zn + + mov x0, #4 + add x1, zm + add x2, zn + add x3, sn + add x4, p + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// Since we eventually want to return 0 when the result is the point at +// infinity, we force xn = 0 whenever zn = 0. This avoids building in a +// dependency on the behavior of modular inverse in out-of-scope cases. + + ldp x0, x1, [zn] + ldp x2, x3, [zn+16] + orr x0, x0, x1 + orr x2, x2, x3 + orr x4, x0, x2 + cmp x4, xzr + ldp x0, x1, [xn] + csel x0, x0, xzr, ne + csel x1, x1, xzr, ne + ldp x2, x3, [xn+16] + stp x0, x1, [xn] + csel x2, x2, xzr, ne + csel x3, x3, xzr, ne + stp x2, x3, [xn+16] + +// Now the result is xn * (1/zn). + + mul_p25519(resx,xn,zm) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S new file mode 100644 index 0000000000..09d62751b8 --- /dev/null +++ b/x86_att/curve25519/curve25519_x25519.S @@ -0,0 +1,1245 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 +// Inputs scalar[4], point[4]; output res[4] +// +// extern void curve25519_x25519 +// (uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]) +// +// Given a scalar n and the X coordinate of an input point P = (X,Y) on +// curve25519 (Y can live in any extension field of characteristic 2^255-19), +// this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the +// point at infinity. Both n and X inputs are first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); +// in particular the lower three bits of n are set to zero. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable homes for the input result argument during the whole body +// and other variables that are only needed prior to the modular inverse. + +#define res 12*NUMSIZE(%rsp) +#define i 12*NUMSIZE+8(%rsp) +#define swap 12*NUMSIZE+16(%rsp) + +// Pointers to result x coord to be written, assuming the base "res" +// has been loaded into %rbp + +#define resx 0(%rbp) + +// Pointer-offset pairs for temporaries on stack with some aliasing. +// Both dmsn and dnsm need space for >= 5 digits, and we allocate 8 + +#define scalar (0*NUMSIZE)(%rsp) + +#define pointx (1*NUMSIZE)(%rsp) + +#define dm (2*NUMSIZE)(%rsp) + +#define zm (3*NUMSIZE)(%rsp) +#define sm (3*NUMSIZE)(%rsp) +#define dpro (3*NUMSIZE)(%rsp) + +#define sn (4*NUMSIZE)(%rsp) + +#define zn (5*NUMSIZE)(%rsp) +#define dn (5*NUMSIZE)(%rsp) +#define e (5*NUMSIZE)(%rsp) + +#define dmsn (6*NUMSIZE)(%rsp) +#define p (6*NUMSIZE)(%rsp) + +#define xm (8*NUMSIZE)(%rsp) +#define dnsm (8*NUMSIZE)(%rsp) +#define spro (8*NUMSIZE)(%rsp) + +#define xn (10*NUMSIZE)(%rsp) +#define s (10*NUMSIZE)(%rsp) + +#define d (11*NUMSIZE)(%rsp) + +// Total size to reserve on the stack +// This includes space for the 3 other variables above +// and rounds up to a multiple of 32 + +#define NSPACE (13*NUMSIZE) + +// Macros wrapping up the basic field operation calls +// bignum_mul_p25519 and bignum_sqr_p25519. +// These two are only trivially different from pure +// function calls to those subroutines. + +#define mul_p25519(P0,P1,P2) \ + xorl %edi, %edi ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rdi, %r12 ; \ + xorl %edi, %edi ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rdi, %r13 ; \ + adcxq %rdi, %r13 ; \ + xorl %edi, %edi ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rdi, %r14 ; \ + adcxq %rdi, %r14 ; \ + xorl %edi, %edi ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rdi, %r15 ; \ + adcxq %rdi, %r15 ; \ + movl $0x26, %edx ; \ + xorl %edi, %edi ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rdi, %r12 ; \ + adcxq %rdi, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + movabs $0x8000000000000000, %rcx ; \ + movl $0x13, %edx ; \ + incq %r12; \ + orq %rcx, %r11 ; \ + mulxq %r12, %rax, %rbx ; \ + addq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rdi, %r10 ; \ + adcq %rdi, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rdx, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rdi, %r9 ; \ + sbbq %rdi, %r10 ; \ + sbbq %rdi, %r11 ; \ + notq %rcx; \ + andq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +#define sqr_p25519(P0,P1) \ + movq P1, %rdx ; \ + mulxq %rdx, %r8, %r15 ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %r13, %r14 ; \ + xorl %ebx, %ebx ; \ + mulxq P1, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq 0x8+P1, %rax, %rcx ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x8+P1, %rax, %rcx ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rbx, %r13 ; \ + adoxq %rbx, %r14 ; \ + adcq %rbx, %r14 ; \ + xorl %ebx, %ebx ; \ + adcxq %r9, %r9 ; \ + adoxq %r15, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %r15 ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %rbx, %r15 ; \ + adoxq %rbx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ebx, %ebx ; \ + mulxq %r12, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + mulxq %r13, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + mulxq %r14, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + adcxq %rbx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + movl $0x13, %edx ; \ + leaq 0x1(%r12), %rax ; \ + bts $0x3f, %r11 ; \ + imulq %rdx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + cmovbq %rbx, %rdx ; \ + subq %rdx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %r11 ; \ + btr $0x3f, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Multiplication just giving a 5-digit result (actually < 39 * p_25519) +// by not doing anything beyond the first stage of reduction + +#define mul_5(P0,P1,P2) \ + xorl %edi, %edi ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rdi, %r12 ; \ + xorl %edi, %edi ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rdi, %r13 ; \ + adcxq %rdi, %r13 ; \ + xorl %edi, %edi ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rdi, %r14 ; \ + adcxq %rdi, %r14 ; \ + xorl %edi, %edi ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rdi, %r15 ; \ + adcxq %rdi, %r15 ; \ + movl $0x26, %edx ; \ + xorl %edi, %edi ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rdi, %r12 ; \ + adcxq %rdi, %r12 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 ; \ + movq %r12, 0x20+P0 + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + movq P1, %rdx ; \ + mulxq %rdx, %r8, %r15 ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %r13, %r14 ; \ + xorl %ebx, %ebx ; \ + mulxq P1, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq 0x8+P1, %rax, %rcx ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x8+P1, %rax, %rcx ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rbx, %r13 ; \ + adoxq %rbx, %r14 ; \ + adcq %rbx, %r14 ; \ + xorl %ebx, %ebx ; \ + adcxq %r9, %r9 ; \ + adoxq %r15, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %r15 ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %rbx, %r15 ; \ + adoxq %rbx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ebx, %ebx ; \ + mulxq %r12, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + mulxq %r13, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + mulxq %r14, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + adcxq %rbx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Plain 4-digit add without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result + +#define add_4(P0,P1,P2) \ + movq P1, %rax ; \ + addq P2, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + adcq 8+P2, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + adcq 16+P2, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + adcq 24+P2, %rax ; \ + movq %rax, 24+P0 + +// Add 5-digit inputs and normalize to 4 digits + +#define add5_4(P0,P1,P2) \ + movq P1, %r8 ; \ + addq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + adcq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + adcq 16+P2, %r10 ; \ + movq 24+P1, %r11 ; \ + adcq 24+P2, %r11 ; \ + movq 32+P1, %r12 ; \ + adcq 32+P2, %r12 ; \ + xorl %ebx, %ebx ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(P0,P1,P2) \ + movq P1, %r8 ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + subq $19, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0, %r9 ; \ + movq %r9, 8+P0 ; \ + sbbq $0, %r10 ; \ + movq %r10, 16+P0 ; \ + sbbq $0, %rax ; \ + btc $63, %rax ; \ + movq %rax, 24+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// 5-digit subtraction with upward bias to make it positive, adding +// 1000 * (2^255 - 19) = 2^256 * 500 - 19000, then normalizing to 4 digits + +#define sub5_4(P0,P1,P2) \ + movq P1, %r8 ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %r11 ; \ + sbbq 24+P2, %r11 ; \ + movq 32+P1, %r12 ; \ + sbbq 32+P2, %r12 ; \ + xorl %ebx, %ebx ; \ + subq $19000, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq %rbx, %r12 ; \ + addq $500, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Combined z = c * x + y with reduction only < 2 * p_25519 +// It is assumed that 19 * (c * x + y) < 2^60 * 2^256 so we +// don't need a high mul in the final part. + +#define cmadd_4(P0,C1,P2,P3) \ + movq P3, %r8 ; \ + movq 8+P3, %r9 ; \ + movq 16+P3, %r10 ; \ + movq 24+P3, %r11 ; \ + xorl %edi, %edi ; \ + movq $C1, %rdx ; \ + mulxq P2, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq 8+P2, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 16+P2, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 24+P2, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rdi, %rbx ; \ + adcxq %rdi, %rbx ; \ + shldq $0x1, %r11, %rbx ; \ + btr $63, %r11 ; \ + movl $0x13, %edx ; \ + imulq %rdx, %rbx ; \ + addq %rbx, %r8 ; \ + adcq %rdi, %r9 ; \ + adcq %rdi, %r10 ; \ + adcq %rdi, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Multiplex: z := if NZ then x else y + +#define mux_4(P0,P1,P2) \ + movq P1, %rax ; \ + movq P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + movq 8+P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + movq 16+P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + movq 24+P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, 24+P0 + +S2N_BN_SYMBOL(curve25519_x25519): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy the inputs to the local variables while mangling them: +// +// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. +// Actually the top zero doesn't matter since the loop below +// never looks at it, so we don't literally modify that. +// +// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 + + movq (%rsi), %rax + andq $~7, %rax + movq %rax, (%rsp) + movq 8(%rsi), %rax + movq %rax, 8(%rsp) + movq 16(%rsi), %rax + movq %rax, 16(%rsp) + movq 24(%rsi), %rax + bts $62, %rax + movq %rax, 24(%rsp) + + movq (%rdx), %r8 + movq 8(%rdx), %r9 + movq 16(%rdx), %r10 + movq 24(%rdx), %r11 + btr $63, %r11 + movq $19, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + addq %r8, %r12 + adcq %r9, %r13 + adcq %r10, %r14 + adcq %r11, %r15 + btr $63, %r15 // x >= 2^255 - 19 <=> x + 19 >= 2^255 + cmovcq %r12, %r8 + movq %r8, 32(%rsp) + cmovcq %r13, %r9 + movq %r9, 40(%rsp) + cmovcq %r14, %r10 + movq %r10, 48(%rsp) + cmovcq %r15, %r11 + movq %r11, 56(%rsp) + +// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 +// We use the fact that the point x coordinate is still in registers + + movq $1, %rax + movq %rax, 320(%rsp) + movq %rax, 96(%rsp) + xorl %eax, %eax + movq %rax, swap + movq %rax, 160(%rsp) + movq %rax, 328(%rsp) + movq %rax, 104(%rsp) + movq %rax, 168(%rsp) + movq %rax, 336(%rsp) + movq %rax, 112(%rsp) + movq %rax, 176(%rsp) + movq %rax, 344(%rsp) + movq %rax, 120(%rsp) + movq %rax, 184(%rsp) + movq 32(%rsp), %rax + movq %r8, 256(%rsp) + movq %r9, 264(%rsp) + movq %r10, 272(%rsp) + movq %r11, 280(%rsp) + +// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) +// This starts at 254, and so implicitly masks bit 255 of the scalar. + + movl $254, %eax + movq %rax, i + +scalarloop: + +// sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn +// The adds don't need any normalization as they're fed to muls +// Just make sure the subs fit in 4 digits. + + sub_4(dm,xm,zm) + add_4(sn,xn,zn) + sub_4(dn,xn,zn) + add_4(sm,xm,zm) + +// ADDING: dmsn = dm * sn; dnsm = sm * dn +// DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + + mul_5(dmsn,sn,dm) + + movq i, %rdx + movq %rdx, %rcx + shrq $6, %rdx + movq (%rsp,%rdx,8), %rdx + shrq %cl, %rdx + andq $1, %rdx + cmpq swap, %rdx + movq %rdx, swap + + mux_4(d,dm,dn) + mux_4(s,sm,sn) + + mul_5(dnsm,sm,dn) + +// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits + + sqr_4(d,d) + +// ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 +// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits + + sub5_4(dpro,dmsn,dnsm) + sqr_4(s,s) + add5_4(spro,dmsn,dnsm) + sqr_4(dpro,dpro) + +// DOUBLING: p = 4 * xt * zt = s - d + + sub_twice4(p,s,d) + +// ADDING: xm' = (dmsn + dnsm)^2 + + sqr_p25519(xm,spro) + +// DOUBLING: e = 121666 * p + d + + cmadd_4(e,0x1db42,p,d) + +// DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d + + mul_p25519(xn,s,d) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_p25519(zm,dpro,pointx) + +// DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) +// = p * (d + 121666 * p) + + mul_p25519(zn,p,e) + +// Loop down as far as 0 (inclusive) + + movq i, %rax + subq $1, %rax + movq %rax, i + jnc scalarloop + +// Since the scalar was forced to be a multiple of 8, we know it's even. +// Hence there is no need to multiplex: the projective answer is (xn,zn) +// and we can ignore (xm,zm); indeed we could have avoided the last three +// differential additions and just done the doublings. +// First set up the constant sn = 2^255 - 19 for the modular inverse. + + movq $-19, %rax + movq $-1, %rcx + movq $0x7fffffffffffffff, %rdx + movq %rax, 128(%rsp) + movq %rcx, 136(%rsp) + movq %rcx, 144(%rsp) + movq %rdx, 152(%rsp) + +// Prepare to call the modular inverse function to get zm = 1/zn + + movq $4, %rdi + leaq 96(%rsp), %rsi + leaq 160(%rsp), %rdx + leaq 128(%rsp), %rcx + leaq 192(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites pointx, scalar and dm, which are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +innerloop: + movq %rbp, %rax + movq %rsi, %rdi + movq %rcx, %r13 + movq %rdx, %r15 + movq $0x1, %rbx + negq %rdi + andq %r14, %rbx + cmoveq %rbx, %rax + cmoveq %rbx, %rdi + cmoveq %rbx, %r13 + cmoveq %rbx, %r15 + movq %r12, %rbx + addq %r14, %rdi + movq %rdi, %r8 + negq %rdi + subq %rax, %rbx + cmovbq %r12, %rbp + cmovbq %r14, %rsi + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %r8, %rdi + movq %rbx, %r12 + notq %rbx + incq %rbx + cmovbq %rbx, %r12 + movq %rdi, %r14 + addq %r13, %r10 + addq %r15, %r11 + shrq $1, %r12 + shrq $1, %r14 + leaq (%rcx,%rcx), %rcx + leaq (%rdx,%rdx), %rdx + decq %r9 + jne innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je wmontend +wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne wmontloop +wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je zmontend +zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne zmontloop +zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb fliploop + subq $0x3a, 0x20(%rsp) + ja outerloop + +// Since we eventually want to return 0 when the result is the point at +// infinity, we force xn = 0 whenever zn = 0. This avoids building in a +// dependency on the behavior of modular inverse in out-of-scope cases. + + movq 160(%rsp), %rax + orq 168(%rsp), %rax + orq 176(%rsp), %rax + orq 184(%rsp), %rax + movq 320(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 320(%rsp) + movq 328(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 328(%rsp) + movq 336(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 336(%rsp) + movq 344(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 344(%rsp) + +// Now the result is xn * (1/zn). + + movq res, %rbp + mul_p25519(resx,xn,zm) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S new file mode 100644 index 0000000000..52600bedeb --- /dev/null +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -0,0 +1,1413 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "LICENSE" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 +// Inputs scalar[4], point[4]; output res[4] +// +// extern void curve25519_x25519_alt +// (uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]) +// +// Given a scalar n and the X coordinate of an input point P = (X,Y) on +// curve25519 (Y can live in any extension field of characteristic 2^255-19), +// this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the +// point at infinity. Both n and X inputs are first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); +// in particular the lower three bits of n are set to zero. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_alt) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable homes for the input result argument during the whole body +// and other variables that are only needed prior to the modular inverse. + +#define res 12*NUMSIZE(%rsp) +#define i 12*NUMSIZE+8(%rsp) +#define swap 12*NUMSIZE+16(%rsp) + +// Pointers to result x coord to be written, assuming the base "res" +// has been loaded into %rbp + +#define resx 0(%rbp) + +// Pointer-offset pairs for temporaries on stack with some aliasing. +// Both dmsn and dnsm need space for >= 5 digits, and we allocate 8 + +#define scalar (0*NUMSIZE)(%rsp) + +#define pointx (1*NUMSIZE)(%rsp) + +#define dm (2*NUMSIZE)(%rsp) + +#define zm (3*NUMSIZE)(%rsp) +#define sm (3*NUMSIZE)(%rsp) +#define dpro (3*NUMSIZE)(%rsp) + +#define sn (4*NUMSIZE)(%rsp) + +#define zn (5*NUMSIZE)(%rsp) +#define dn (5*NUMSIZE)(%rsp) +#define e (5*NUMSIZE)(%rsp) + +#define dmsn (6*NUMSIZE)(%rsp) +#define p (6*NUMSIZE)(%rsp) + +#define xm (8*NUMSIZE)(%rsp) +#define dnsm (8*NUMSIZE)(%rsp) +#define spro (8*NUMSIZE)(%rsp) + +#define xn (10*NUMSIZE)(%rsp) +#define s (10*NUMSIZE)(%rsp) + +#define d (11*NUMSIZE)(%rsp) + +// Total size to reserve on the stack +// This includes space for the 3 other variables above +// and rounds up to a multiple of 32 + +#define NSPACE (13*NUMSIZE) + +// Macros wrapping up the basic field operation calls +// bignum_mul_p25519_alt and bignum_sqr_p25519_alt. +// These two are only trivially different from pure +// function calls to those subroutines. + +#define mul_p25519(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + leaq 0x1(%r12), %rax ; \ + movl $0x13, %esi ; \ + movabsq $0x8000000000000000, %r12 ; \ + orq %r12, %r11 ; \ + imulq %rsi, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rsi, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rcx, %r9 ; \ + sbbq %rcx, %r10 ; \ + sbbq %rcx, %r11 ; \ + notq %r12; \ + andq %r12, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +#define sqr_p25519(P0,P1) \ + movq P1, %rax ; \ + mulq %rax; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r11 ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r12 ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r13 ; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r13 ; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r14 ; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r15 ; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + leaq 0x1(%r12), %rax ; \ + movl $0x13, %esi ; \ + movabsq $0x8000000000000000, %r12 ; \ + orq %r12, %r11 ; \ + imulq %rsi, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rsi, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rcx, %r9 ; \ + sbbq %rcx, %r10 ; \ + sbbq %rcx, %r11 ; \ + notq %r12; \ + andq %r12, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Multiplication just giving a 5-digit result (actually < 39 * p_25519) +// by not doing anything beyond the first stage of reduction + +#define mul_5(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 ; \ + movq %r12, 0x20+P0 + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + movq P1, %rax ; \ + mulq %rax; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r11 ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r12 ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r13 ; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r13 ; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r14 ; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r15 ; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Plain 4-digit add without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result + +#define add_4(P0,P1,P2) \ + movq P1, %rax ; \ + addq P2, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + adcq 8+P2, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + adcq 16+P2, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + adcq 24+P2, %rax ; \ + movq %rax, 24+P0 + +// Add 5-digit inputs and normalize to 4 digits + +#define add5_4(P0,P1,P2) \ + movq P1, %r8 ; \ + addq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + adcq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + adcq 16+P2, %r10 ; \ + movq 24+P1, %r11 ; \ + adcq 24+P2, %r11 ; \ + movq 32+P1, %r12 ; \ + adcq 32+P2, %r12 ; \ + xorl %ebx, %ebx ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(P0,P1,P2) \ + movq P1, %r8 ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + subq $19, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0, %r9 ; \ + movq %r9, 8+P0 ; \ + sbbq $0, %r10 ; \ + movq %r10, 16+P0 ; \ + sbbq $0, %rax ; \ + btc $63, %rax ; \ + movq %rax, 24+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// 5-digit subtraction with upward bias to make it positive, adding +// 1000 * (2^255 - 19) = 2^256 * 500 - 19000, then normalizing to 4 digits + +#define sub5_4(P0,P1,P2) \ + movq P1, %r8 ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %r11 ; \ + sbbq 24+P2, %r11 ; \ + movq 32+P1, %r12 ; \ + sbbq 32+P2, %r12 ; \ + xorl %ebx, %ebx ; \ + subq $19000, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %r11 ; \ + sbbq %rbx, %r12 ; \ + addq $500, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Combined z = c * x + y with reduction only < 2 * p_25519 +// It is assumed that 19 * (c * x + y) < 2^60 * 2^256 so we +// don't need a high mul in the final part. + +#define cmadd_4(P0,C1,P2,P3) \ + movq $C1, %rsi ; \ + movq P2, %rax ; \ + mulq %rsi; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + movq 0x8+P2, %rax ; \ + xorq %r10, %r10 ; \ + mulq %rsi; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x10+P2, %rax ; \ + mulq %rsi; \ + addq %rax, %r10 ; \ + adcq $0x0, %rdx ; \ + movq 0x18+P2, %rax ; \ + movq %rdx, %r11 ; \ + mulq %rsi; \ + xorl %esi, %esi ; \ + addq %rax, %r11 ; \ + adcq %rsi, %rdx ; \ + addq P3, %r8 ; \ + adcq 0x8+P3, %r9 ; \ + adcq 0x10+P3, %r10 ; \ + adcq 0x18+P3, %r11 ; \ + adcq %rsi, %rdx ; \ + shldq $0x1, %r11, %rdx ; \ + btr $63, %r11 ; \ + movl $0x13, %ebx ; \ + imulq %rbx, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rsi, %r9 ; \ + adcq %rsi, %r10 ; \ + adcq %rsi, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Multiplex: z := if NZ then x else y + +#define mux_4(P0,P1,P2) \ + movq P1, %rax ; \ + movq P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + movq 8+P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + movq 16+P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + movq 24+P2, %rcx ; \ + cmovzq %rcx, %rax ; \ + movq %rax, 24+P0 + +S2N_BN_SYMBOL(curve25519_x25519_alt): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy the inputs to the local variables while mangling them: +// +// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. +// Actually the top zero doesn't matter since the loop below +// never looks at it, so we don't literally modify that. +// +// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 + + movq (%rsi), %rax + andq $~7, %rax + movq %rax, (%rsp) + movq 8(%rsi), %rax + movq %rax, 8(%rsp) + movq 16(%rsi), %rax + movq %rax, 16(%rsp) + movq 24(%rsi), %rax + bts $62, %rax + movq %rax, 24(%rsp) + + movq (%rdx), %r8 + movq 8(%rdx), %r9 + movq 16(%rdx), %r10 + movq 24(%rdx), %r11 + btr $63, %r11 + movq $19, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + addq %r8, %r12 + adcq %r9, %r13 + adcq %r10, %r14 + adcq %r11, %r15 + btr $63, %r15 // x >= 2^255 - 19 <=> x + 19 >= 2^255 + cmovcq %r12, %r8 + movq %r8, 32(%rsp) + cmovcq %r13, %r9 + movq %r9, 40(%rsp) + cmovcq %r14, %r10 + movq %r10, 48(%rsp) + cmovcq %r15, %r11 + movq %r11, 56(%rsp) + +// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 +// We use the fact that the point x coordinate is still in registers + + movq $1, %rax + movq %rax, 320(%rsp) + movq %rax, 96(%rsp) + xorl %eax, %eax + movq %rax, swap + movq %rax, 160(%rsp) + movq %rax, 328(%rsp) + movq %rax, 104(%rsp) + movq %rax, 168(%rsp) + movq %rax, 336(%rsp) + movq %rax, 112(%rsp) + movq %rax, 176(%rsp) + movq %rax, 344(%rsp) + movq %rax, 120(%rsp) + movq %rax, 184(%rsp) + movq 32(%rsp), %rax + movq %r8, 256(%rsp) + movq %r9, 264(%rsp) + movq %r10, 272(%rsp) + movq %r11, 280(%rsp) + +// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) +// This starts at 254, and so implicitly masks bit 255 of the scalar. + + movl $254, %eax + movq %rax, i + +scalarloop: + +// sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn +// The adds don't need any normalization as they're fed to muls +// Just make sure the subs fit in 4 digits. + + sub_4(dm,xm,zm) + add_4(sn,xn,zn) + sub_4(dn,xn,zn) + add_4(sm,xm,zm) + +// ADDING: dmsn = dm * sn; dnsm = sm * dn +// DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + + mul_5(dmsn,sn,dm) + + movq i, %rdx + movq %rdx, %rcx + shrq $6, %rdx + movq (%rsp,%rdx,8), %rdx + shrq %cl, %rdx + andq $1, %rdx + cmpq swap, %rdx + movq %rdx, swap + + mux_4(d,dm,dn) + mux_4(s,sm,sn) + + mul_5(dnsm,sm,dn) + +// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits + + sqr_4(d,d) + +// ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 +// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits + + sub5_4(dpro,dmsn,dnsm) + sqr_4(s,s) + add5_4(spro,dmsn,dnsm) + sqr_4(dpro,dpro) + +// DOUBLING: p = 4 * xt * zt = s - d + + sub_twice4(p,s,d) + +// ADDING: xm' = (dmsn + dnsm)^2 + + sqr_p25519(xm,spro) + +// DOUBLING: e = 121666 * p + d + + cmadd_4(e,0x1db42,p,d) + +// DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d + + mul_p25519(xn,s,d) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_p25519(zm,dpro,pointx) + +// DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) +// = p * (d + 121666 * p) + + mul_p25519(zn,p,e) + +// Loop down as far as 0 (inclusive) + + movq i, %rax + subq $1, %rax + movq %rax, i + jnc scalarloop + +// Since the scalar was forced to be a multiple of 8, we know it's even. +// Hence there is no need to multiplex: the projective answer is (xn,zn) +// and we can ignore (xm,zm); indeed we could have avoided the last three +// differential additions and just done the doublings. +// First set up the constant sn = 2^255 - 19 for the modular inverse. + + movq $-19, %rax + movq $-1, %rcx + movq $0x7fffffffffffffff, %rdx + movq %rax, 128(%rsp) + movq %rcx, 136(%rsp) + movq %rcx, 144(%rsp) + movq %rdx, 152(%rsp) + +// Prepare to call the modular inverse function to get zm = 1/zn + + movq $4, %rdi + leaq 96(%rsp), %rsi + leaq 160(%rsp), %rdx + leaq 128(%rsp), %rcx + leaq 192(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites pointx, scalar and dm, which are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +innerloop: + movq %rbp, %rax + movq %rsi, %rdi + movq %rcx, %r13 + movq %rdx, %r15 + movq $0x1, %rbx + negq %rdi + andq %r14, %rbx + cmoveq %rbx, %rax + cmoveq %rbx, %rdi + cmoveq %rbx, %r13 + cmoveq %rbx, %r15 + movq %r12, %rbx + addq %r14, %rdi + movq %rdi, %r8 + negq %rdi + subq %rax, %rbx + cmovbq %r12, %rbp + cmovbq %r14, %rsi + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %r8, %rdi + movq %rbx, %r12 + notq %rbx + incq %rbx + cmovbq %rbx, %r12 + movq %rdi, %r14 + addq %r13, %r10 + addq %r15, %r11 + shrq $1, %r12 + shrq $1, %r14 + leaq (%rcx,%rcx), %rcx + leaq (%rdx,%rdx), %rdx + decq %r9 + jne innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je wmontend +wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne wmontloop +wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je zmontend +zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne zmontloop +zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb fliploop + subq $0x3a, 0x20(%rsp) + ja outerloop + +// Since we eventually want to return 0 when the result is the point at +// infinity, we force xn = 0 whenever zn = 0. This avoids building in a +// dependency on the behavior of modular inverse in out-of-scope cases. + + movq 160(%rsp), %rax + orq 168(%rsp), %rax + orq 176(%rsp), %rax + orq 184(%rsp), %rax + movq 320(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 320(%rsp) + movq 328(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 328(%rsp) + movq 336(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 336(%rsp) + movq 344(%rsp), %rcx + cmovzq %rax, %rcx + movq %rcx, 344(%rsp) + +// Now the result is xn * (1/zn). + + movq res, %rbp + mul_p25519(resx,xn,zm) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From 6774fef748b3cb505938dd26215078053e5dad64 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 11 Oct 2022 16:44:05 -0700 Subject: [PATCH 16/42] Add curve25519 field doubling A trivial simplification of bignum_add_p25519. Also tweaked a few ARM functions where "adcs" could be "adc" because the carry is not needed. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/07994facb87fa5d9e5d996d7a8f07d8189a36c6b --- arm/curve25519/curve25519_x25519.S | 2 +- arm/curve25519/curve25519_x25519_alt.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index bea6d50ce4..fcd05cb5eb 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -648,7 +648,7 @@ ldp x2, x3, [p1+16]; \ ldp x6, x7, [p2+16]; \ adcs x2, x2, x6; \ - adcs x3, x3, x7; \ + adc x3, x3, x7; \ stp x0, x1, [p0]; \ stp x2, x3, [p0+16] diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 8a0495f97c..662446a901 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -462,7 +462,7 @@ ldp x2, x3, [p1+16]; \ ldp x6, x7, [p2+16]; \ adcs x2, x2, x6; \ - adcs x3, x3, x7; \ + adc x3, x3, x7; \ stp x0, x1, [p0]; \ stp x2, x3, [p0+16] From 00257077bc646cd2f393bf21dd2f32b2229cf6eb Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 26 Oct 2022 14:09:24 -0700 Subject: [PATCH 17/42] Update forgotten AT&T syntax forms in line with previous changes s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/b96beae0d76006634a53e4d126a6bb04924f5310 --- x86_att/curve25519/curve25519_x25519.S | 6 ++---- x86_att/curve25519/curve25519_x25519_alt.S | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 09d62751b8..e7fee5e71f 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -167,10 +167,9 @@ adoxq %rdi, %r12 ; \ adcxq %rdi, %r12 ; \ shldq $0x1, %r11, %r12 ; \ - movabs $0x8000000000000000, %rcx ; \ movl $0x13, %edx ; \ incq %r12; \ - orq %rcx, %r11 ; \ + bts $63, %r11 ; \ mulxq %r12, %rax, %rbx ; \ addq %rax, %r8 ; \ adcq %rbx, %r9 ; \ @@ -183,8 +182,7 @@ sbbq %rdi, %r9 ; \ sbbq %rdi, %r10 ; \ sbbq %rdi, %r11 ; \ - notq %rcx; \ - andq %rcx, %r11 ; \ + btr $63, %r11 ; \ movq %r8, P0 ; \ movq %r9, 0x8+P0 ; \ movq %r10, 0x10+P0 ; \ diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 52600bedeb..d3700e4b1f 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -207,8 +207,7 @@ shldq $0x1, %r11, %r12 ; \ leaq 0x1(%r12), %rax ; \ movl $0x13, %esi ; \ - movabsq $0x8000000000000000, %r12 ; \ - orq %r12, %r11 ; \ + bts $63, %r11 ; \ imulq %rsi, %rax ; \ addq %rax, %r8 ; \ adcq %rcx, %r9 ; \ @@ -221,8 +220,7 @@ sbbq %rcx, %r9 ; \ sbbq %rcx, %r10 ; \ sbbq %rcx, %r11 ; \ - notq %r12; \ - andq %r12, %r11 ; \ + btr $63, %r11 ; \ movq %r8, P0 ; \ movq %r9, 0x8+P0 ; \ movq %r10, 0x10+P0 ; \ @@ -329,8 +327,7 @@ shldq $0x1, %r11, %r12 ; \ leaq 0x1(%r12), %rax ; \ movl $0x13, %esi ; \ - movabsq $0x8000000000000000, %r12 ; \ - orq %r12, %r11 ; \ + bts $63, %r11 ; \ imulq %rsi, %rax ; \ addq %rax, %r8 ; \ adcq %rcx, %r9 ; \ @@ -343,8 +340,7 @@ sbbq %rcx, %r9 ; \ sbbq %rcx, %r10 ; \ sbbq %rcx, %r11 ; \ - notq %r12; \ - andq %r12, %r11 ; \ + btr $63, %r11 ; \ movq %r8, P0 ; \ movq %r9, 0x8+P0 ; \ movq %r10, 0x10+P0 ; \ From a49bf784263f3fde68ab9f4c673975a1296d1481 Mon Sep 17 00:00:00 2001 From: sachiang Date: Fri, 28 Oct 2022 14:41:18 -0700 Subject: [PATCH 18/42] update license headers of .c and .s files s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/f6670af4f5dc4df40d334a79f39a4cbf727b8510 --- arm/curve25519/bignum_neg_p25519.S | 16 ++-------------- arm/curve25519/curve25519_x25519.S | 16 ++-------------- arm/curve25519/curve25519_x25519_alt.S | 16 ++-------------- arm/fastmul/bignum_emontredc_8n.S | 16 ++-------------- arm/fastmul/bignum_kmul_16_32.S | 16 ++-------------- arm/fastmul/bignum_kmul_32_64.S | 16 ++-------------- arm/fastmul/bignum_ksqr_16_32.S | 16 ++-------------- arm/fastmul/bignum_ksqr_32_64.S | 16 ++-------------- arm/generic/bignum_ge.S | 16 ++-------------- arm/generic/bignum_mul.S | 16 ++-------------- arm/generic/bignum_optsub.S | 16 ++-------------- arm/generic/bignum_sqr.S | 16 ++-------------- arm/p384/bignum_add_p384.S | 16 ++-------------- arm/p384/bignum_bigendian_6.S | 16 ++-------------- arm/p384/bignum_cmul_p384.S | 16 ++-------------- arm/p384/bignum_deamont_p384.S | 16 ++-------------- arm/p384/bignum_demont_p384.S | 16 ++-------------- arm/p384/bignum_double_p384.S | 16 ++-------------- arm/p384/bignum_half_p384.S | 16 ++-------------- arm/p384/bignum_littleendian_6.S | 16 ++-------------- arm/p384/bignum_mod_n384.S | 16 ++-------------- arm/p384/bignum_mod_n384_6.S | 16 ++-------------- arm/p384/bignum_mod_p384.S | 16 ++-------------- arm/p384/bignum_mod_p384_6.S | 16 ++-------------- arm/p384/bignum_montmul_p384.S | 16 ++-------------- arm/p384/bignum_montmul_p384_alt.S | 16 ++-------------- arm/p384/bignum_montsqr_p384.S | 16 ++-------------- arm/p384/bignum_montsqr_p384_alt.S | 16 ++-------------- arm/p384/bignum_mux_6.S | 16 ++-------------- arm/p384/bignum_neg_p384.S | 16 ++-------------- arm/p384/bignum_nonzero_6.S | 16 ++-------------- arm/p384/bignum_optneg_p384.S | 16 ++-------------- arm/p384/bignum_sub_p384.S | 16 ++-------------- arm/p384/bignum_tomont_p384.S | 16 ++-------------- arm/p384/bignum_triple_p384.S | 16 ++-------------- arm/p384/p384_montjadd.S | 16 ++-------------- arm/p384/p384_montjdouble.S | 16 ++-------------- arm/p384/p384_montjmixadd.S | 16 ++-------------- arm/p521/bignum_add_p521.S | 16 ++-------------- arm/p521/bignum_cmul_p521.S | 16 ++-------------- arm/p521/bignum_deamont_p521.S | 16 ++-------------- arm/p521/bignum_demont_p521.S | 16 ++-------------- arm/p521/bignum_double_p521.S | 16 ++-------------- arm/p521/bignum_fromlebytes_p521.S | 16 ++-------------- arm/p521/bignum_half_p521.S | 16 ++-------------- arm/p521/bignum_mod_n521_9.S | 16 ++-------------- arm/p521/bignum_mod_p521_9.S | 16 ++-------------- arm/p521/bignum_montmul_p521.S | 16 ++-------------- arm/p521/bignum_montmul_p521_alt.S | 16 ++-------------- arm/p521/bignum_montsqr_p521.S | 16 ++-------------- arm/p521/bignum_montsqr_p521_alt.S | 16 ++-------------- arm/p521/bignum_mul_p521.S | 16 ++-------------- arm/p521/bignum_mul_p521_alt.S | 16 ++-------------- arm/p521/bignum_neg_p521.S | 16 ++-------------- arm/p521/bignum_optneg_p521.S | 16 ++-------------- arm/p521/bignum_sqr_p521.S | 16 ++-------------- arm/p521/bignum_sqr_p521_alt.S | 16 ++-------------- arm/p521/bignum_sub_p521.S | 16 ++-------------- arm/p521/bignum_tolebytes_p521.S | 16 ++-------------- arm/p521/bignum_tomont_p521.S | 16 ++-------------- arm/p521/bignum_triple_p521.S | 16 ++-------------- arm/p521/p521_jadd.S | 16 ++-------------- arm/p521/p521_jdouble.S | 16 ++-------------- arm/p521/p521_jmixadd.S | 16 ++-------------- x86_att/curve25519/bignum_neg_p25519.S | 16 ++-------------- x86_att/curve25519/curve25519_x25519.S | 16 ++-------------- x86_att/curve25519/curve25519_x25519_alt.S | 16 ++-------------- x86_att/p384/bignum_add_p384.S | 16 ++-------------- x86_att/p384/bignum_bigendian_6.S | 16 ++-------------- x86_att/p384/bignum_cmul_p384.S | 16 ++-------------- x86_att/p384/bignum_cmul_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_deamont_p384.S | 16 ++-------------- x86_att/p384/bignum_deamont_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_demont_p384.S | 16 ++-------------- x86_att/p384/bignum_demont_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_double_p384.S | 16 ++-------------- x86_att/p384/bignum_half_p384.S | 16 ++-------------- x86_att/p384/bignum_littleendian_6.S | 16 ++-------------- x86_att/p384/bignum_mod_n384.S | 16 ++-------------- x86_att/p384/bignum_mod_n384_6.S | 16 ++-------------- x86_att/p384/bignum_mod_n384_alt.S | 16 ++-------------- x86_att/p384/bignum_mod_p384.S | 16 ++-------------- x86_att/p384/bignum_mod_p384_6.S | 16 ++-------------- x86_att/p384/bignum_mod_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_montmul_p384.S | 16 ++-------------- x86_att/p384/bignum_montmul_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_montsqr_p384.S | 16 ++-------------- x86_att/p384/bignum_montsqr_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_mux_6.S | 16 ++-------------- x86_att/p384/bignum_neg_p384.S | 16 ++-------------- x86_att/p384/bignum_nonzero_6.S | 16 ++-------------- x86_att/p384/bignum_optneg_p384.S | 16 ++-------------- x86_att/p384/bignum_sub_p384.S | 16 ++-------------- x86_att/p384/bignum_tomont_p384.S | 16 ++-------------- x86_att/p384/bignum_tomont_p384_alt.S | 16 ++-------------- x86_att/p384/bignum_triple_p384.S | 16 ++-------------- x86_att/p384/bignum_triple_p384_alt.S | 16 ++-------------- x86_att/p384/p384_montjadd.S | 16 ++-------------- x86_att/p384/p384_montjdouble.S | 16 ++-------------- x86_att/p384/p384_montjmixadd.S | 16 ++-------------- x86_att/p521/bignum_add_p521.S | 16 ++-------------- x86_att/p521/bignum_cmul_p521.S | 16 ++-------------- x86_att/p521/bignum_cmul_p521_alt.S | 16 ++-------------- x86_att/p521/bignum_deamont_p521.S | 16 ++-------------- x86_att/p521/bignum_demont_p521.S | 16 ++-------------- x86_att/p521/bignum_double_p521.S | 16 ++-------------- x86_att/p521/bignum_fromlebytes_p521.S | 16 ++-------------- x86_att/p521/bignum_half_p521.S | 16 ++-------------- x86_att/p521/bignum_mod_n521_9.S | 16 ++-------------- x86_att/p521/bignum_mod_n521_9_alt.S | 16 ++-------------- x86_att/p521/bignum_mod_p521_9.S | 16 ++-------------- x86_att/p521/bignum_montmul_p521.S | 16 ++-------------- x86_att/p521/bignum_montmul_p521_alt.S | 16 ++-------------- x86_att/p521/bignum_montsqr_p521.S | 16 ++-------------- x86_att/p521/bignum_montsqr_p521_alt.S | 16 ++-------------- x86_att/p521/bignum_mul_p521.S | 16 ++-------------- x86_att/p521/bignum_mul_p521_alt.S | 16 ++-------------- x86_att/p521/bignum_neg_p521.S | 16 ++-------------- x86_att/p521/bignum_optneg_p521.S | 16 ++-------------- x86_att/p521/bignum_sqr_p521.S | 16 ++-------------- x86_att/p521/bignum_sqr_p521_alt.S | 16 ++-------------- x86_att/p521/bignum_sub_p521.S | 16 ++-------------- x86_att/p521/bignum_tolebytes_p521.S | 16 ++-------------- x86_att/p521/bignum_tomont_p521.S | 16 ++-------------- x86_att/p521/bignum_triple_p521.S | 16 ++-------------- x86_att/p521/bignum_triple_p521_alt.S | 16 ++-------------- x86_att/p521/p521_jadd.S | 16 ++-------------- x86_att/p521/p521_jdouble.S | 16 ++-------------- x86_att/p521/p521_jmixadd.S | 16 ++-------------- 129 files changed, 258 insertions(+), 1806 deletions(-) diff --git a/arm/curve25519/bignum_neg_p25519.S b/arm/curve25519/bignum_neg_p25519.S index 3d333ae42a..8466df43c1 100644 --- a/arm/curve25519/bignum_neg_p25519.S +++ b/arm/curve25519/bignum_neg_p25519.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index bea6d50ce4..f9a14a928b 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // The x25519 function for curve25519 diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 8a0495f97c..be1ce42332 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // The x25519 function for curve25519 diff --git a/arm/fastmul/bignum_emontredc_8n.S b/arm/fastmul/bignum_emontredc_8n.S index 02fcff72a5..f66d536048 100644 --- a/arm/fastmul/bignum_emontredc_8n.S +++ b/arm/fastmul/bignum_emontredc_8n.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Extended Montgomery reduce in 8-digit blocks, results in input-output buffer diff --git a/arm/fastmul/bignum_kmul_16_32.S b/arm/fastmul/bignum_kmul_16_32.S index 3234fa904d..11dc1a7231 100644 --- a/arm/fastmul/bignum_kmul_16_32.S +++ b/arm/fastmul/bignum_kmul_16_32.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply z := x * y diff --git a/arm/fastmul/bignum_kmul_32_64.S b/arm/fastmul/bignum_kmul_32_64.S index 781b27bafe..42286ff36c 100644 --- a/arm/fastmul/bignum_kmul_32_64.S +++ b/arm/fastmul/bignum_kmul_32_64.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply z := x * y diff --git a/arm/fastmul/bignum_ksqr_16_32.S b/arm/fastmul/bignum_ksqr_16_32.S index 36c39c5630..9d7c9e7af0 100644 --- a/arm/fastmul/bignum_ksqr_16_32.S +++ b/arm/fastmul/bignum_ksqr_16_32.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square, z := x^2 diff --git a/arm/fastmul/bignum_ksqr_32_64.S b/arm/fastmul/bignum_ksqr_32_64.S index 7d42af67a3..0405db1af5 100644 --- a/arm/fastmul/bignum_ksqr_32_64.S +++ b/arm/fastmul/bignum_ksqr_32_64.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square, z := x^2 diff --git a/arm/generic/bignum_ge.S b/arm/generic/bignum_ge.S index a43105b9f8..6ba9202f00 100644 --- a/arm/generic/bignum_ge.S +++ b/arm/generic/bignum_ge.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Compare bignums, x >= y diff --git a/arm/generic/bignum_mul.S b/arm/generic/bignum_mul.S index 1563e408d7..85bfad9d32 100644 --- a/arm/generic/bignum_mul.S +++ b/arm/generic/bignum_mul.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply z := x * y diff --git a/arm/generic/bignum_optsub.S b/arm/generic/bignum_optsub.S index 9f0a5f494c..5d82a407d4 100644 --- a/arm/generic/bignum_optsub.S +++ b/arm/generic/bignum_optsub.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) diff --git a/arm/generic/bignum_sqr.S b/arm/generic/bignum_sqr.S index ec2f80b266..39683061fb 100644 --- a/arm/generic/bignum_sqr.S +++ b/arm/generic/bignum_sqr.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square z := x^2 diff --git a/arm/p384/bignum_add_p384.S b/arm/p384/bignum_add_p384.S index dd7fc36a38..00c8e81d31 100644 --- a/arm/p384/bignum_add_p384.S +++ b/arm/p384/bignum_add_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced diff --git a/arm/p384/bignum_bigendian_6.S b/arm/p384/bignum_bigendian_6.S index 7d5145453e..664ae845dd 100644 --- a/arm/p384/bignum_bigendian_6.S +++ b/arm/p384/bignum_bigendian_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert 6-digit (384-bit) bignum to/from big-endian form diff --git a/arm/p384/bignum_cmul_p384.S b/arm/p384/bignum_cmul_p384.S index 3ac32252f1..b9570c7998 100644 --- a/arm/p384/bignum_cmul_p384.S +++ b/arm/p384/bignum_cmul_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming diff --git a/arm/p384/bignum_deamont_p384.S b/arm/p384/bignum_deamont_p384.S index 932dfcc6e8..91ea265a97 100644 --- a/arm/p384/bignum_deamont_p384.S +++ b/arm/p384/bignum_deamont_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 diff --git a/arm/p384/bignum_demont_p384.S b/arm/p384/bignum_demont_p384.S index 4b4b7e94a8..c0dd331d64 100644 --- a/arm/p384/bignum_demont_p384.S +++ b/arm/p384/bignum_demont_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced diff --git a/arm/p384/bignum_double_p384.S b/arm/p384/bignum_double_p384.S index cdcbc6aa67..fce40a0ff1 100644 --- a/arm/p384/bignum_double_p384.S +++ b/arm/p384/bignum_double_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced diff --git a/arm/p384/bignum_half_p384.S b/arm/p384/bignum_half_p384.S index c9b3c87665..e242829c6b 100644 --- a/arm/p384/bignum_half_p384.S +++ b/arm/p384/bignum_half_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced diff --git a/arm/p384/bignum_littleendian_6.S b/arm/p384/bignum_littleendian_6.S index 3144fc18d4..66b0424a51 100644 --- a/arm/p384/bignum_littleendian_6.S +++ b/arm/p384/bignum_littleendian_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert 6-digit (384-bit) bignum to/from little-endian form diff --git a/arm/p384/bignum_mod_n384.S b/arm/p384/bignum_mod_n384.S index a02771cbfe..e8de84d4cb 100644 --- a/arm/p384/bignum_mod_n384.S +++ b/arm/p384/bignum_mod_n384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_384 diff --git a/arm/p384/bignum_mod_n384_6.S b/arm/p384/bignum_mod_n384_6.S index 4ff06ce728..c382e642ca 100644 --- a/arm/p384/bignum_mod_n384_6.S +++ b/arm/p384/bignum_mod_n384_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_384 diff --git a/arm/p384/bignum_mod_p384.S b/arm/p384/bignum_mod_p384.S index 35771e7d8f..c2ab35526f 100644 --- a/arm/p384/bignum_mod_p384.S +++ b/arm/p384/bignum_mod_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_384 diff --git a/arm/p384/bignum_mod_p384_6.S b/arm/p384/bignum_mod_p384_6.S index b198b5c603..a1ac615b1a 100644 --- a/arm/p384/bignum_mod_p384_6.S +++ b/arm/p384/bignum_mod_p384_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_384 diff --git a/arm/p384/bignum_montmul_p384.S b/arm/p384/bignum_montmul_p384.S index 48ca4c3218..554081f39e 100644 --- a/arm/p384/bignum_montmul_p384.S +++ b/arm/p384/bignum_montmul_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^384) mod p_384 diff --git a/arm/p384/bignum_montmul_p384_alt.S b/arm/p384/bignum_montmul_p384_alt.S index 72883df225..2bd28cfffa 100644 --- a/arm/p384/bignum_montmul_p384_alt.S +++ b/arm/p384/bignum_montmul_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^384) mod p_384 diff --git a/arm/p384/bignum_montsqr_p384.S b/arm/p384/bignum_montsqr_p384.S index 0446ad74cd..1067bf1a78 100644 --- a/arm/p384/bignum_montsqr_p384.S +++ b/arm/p384/bignum_montsqr_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^384) mod p_384 diff --git a/arm/p384/bignum_montsqr_p384_alt.S b/arm/p384/bignum_montsqr_p384_alt.S index 35dba1ca50..e4fe2f7f5b 100644 --- a/arm/p384/bignum_montsqr_p384_alt.S +++ b/arm/p384/bignum_montsqr_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^384) mod p_384 diff --git a/arm/p384/bignum_mux_6.S b/arm/p384/bignum_mux_6.S index 30dab886ae..b4c966609f 100644 --- a/arm/p384/bignum_mux_6.S +++ b/arm/p384/bignum_mux_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) diff --git a/arm/p384/bignum_neg_p384.S b/arm/p384/bignum_neg_p384.S index 6d661a716d..48efcb51bd 100644 --- a/arm/p384/bignum_neg_p384.S +++ b/arm/p384/bignum_neg_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced diff --git a/arm/p384/bignum_nonzero_6.S b/arm/p384/bignum_nonzero_6.S index bdc6232eee..ae003186b8 100644 --- a/arm/p384/bignum_nonzero_6.S +++ b/arm/p384/bignum_nonzero_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // 384-bit nonzeroness test, returning 1 if x is nonzero, 0 if x is zero diff --git a/arm/p384/bignum_optneg_p384.S b/arm/p384/bignum_optneg_p384.S index eec01642d1..7b5e704348 100644 --- a/arm/p384/bignum_optneg_p384.S +++ b/arm/p384/bignum_optneg_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or diff --git a/arm/p384/bignum_sub_p384.S b/arm/p384/bignum_sub_p384.S index 0e8af143f3..bd7a9deeff 100644 --- a/arm/p384/bignum_sub_p384.S +++ b/arm/p384/bignum_sub_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Subtract modulo p_384, z := (x - y) mod p_384 diff --git a/arm/p384/bignum_tomont_p384.S b/arm/p384/bignum_tomont_p384.S index bc240cffdd..c5bea5e0d5 100644 --- a/arm/p384/bignum_tomont_p384.S +++ b/arm/p384/bignum_tomont_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert to Montgomery form z := (2^384 * x) mod p_384 diff --git a/arm/p384/bignum_triple_p384.S b/arm/p384/bignum_triple_p384.S index 0786f21fab..cc641a2eeb 100644 --- a/arm/p384/bignum_triple_p384.S +++ b/arm/p384/bignum_triple_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Triple modulo p_384, z := (3 * x) mod p_384 diff --git a/arm/p384/p384_montjadd.S b/arm/p384/p384_montjadd.S index 138afa9dc3..98f40b0a80 100644 --- a/arm/p384/p384_montjadd.S +++ b/arm/p384/p384_montjadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates diff --git a/arm/p384/p384_montjdouble.S b/arm/p384/p384_montjdouble.S index 8fa2ad3234..5b4a609b59 100644 --- a/arm/p384/p384_montjdouble.S +++ b/arm/p384/p384_montjdouble.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates diff --git a/arm/p384/p384_montjmixadd.S b/arm/p384/p384_montjmixadd.S index f7467be289..0f5c24203f 100644 --- a/arm/p384/p384_montjmixadd.S +++ b/arm/p384/p384_montjmixadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates diff --git a/arm/p521/bignum_add_p521.S b/arm/p521/bignum_add_p521.S index 7576cbc741..d9d59bbd48 100644 --- a/arm/p521/bignum_add_p521.S +++ b/arm/p521/bignum_add_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced diff --git a/arm/p521/bignum_cmul_p521.S b/arm/p521/bignum_cmul_p521.S index f6b4face41..0b657b8b73 100644 --- a/arm/p521/bignum_cmul_p521.S +++ b/arm/p521/bignum_cmul_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming diff --git a/arm/p521/bignum_deamont_p521.S b/arm/p521/bignum_deamont_p521.S index c3eab0a481..442e5d4048 100644 --- a/arm/p521/bignum_deamont_p521.S +++ b/arm/p521/bignum_deamont_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^576) mod p_521 diff --git a/arm/p521/bignum_demont_p521.S b/arm/p521/bignum_demont_p521.S index 1756f86e10..d3004ec580 100644 --- a/arm/p521/bignum_demont_p521.S +++ b/arm/p521/bignum_demont_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced diff --git a/arm/p521/bignum_double_p521.S b/arm/p521/bignum_double_p521.S index df3cfee500..8d0e291120 100644 --- a/arm/p521/bignum_double_p521.S +++ b/arm/p521/bignum_double_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced diff --git a/arm/p521/bignum_fromlebytes_p521.S b/arm/p521/bignum_fromlebytes_p521.S index c348d5faa7..7a87ed3338 100644 --- a/arm/p521/bignum_fromlebytes_p521.S +++ b/arm/p521/bignum_fromlebytes_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert little-endian bytes to 9-digit 528-bit bignum diff --git a/arm/p521/bignum_half_p521.S b/arm/p521/bignum_half_p521.S index bfc489e3bc..1f8da155ba 100644 --- a/arm/p521/bignum_half_p521.S +++ b/arm/p521/bignum_half_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced diff --git a/arm/p521/bignum_mod_n521_9.S b/arm/p521/bignum_mod_n521_9.S index cfae5787a2..65bc4f08bb 100644 --- a/arm/p521/bignum_mod_n521_9.S +++ b/arm/p521/bignum_mod_n521_9.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_521 diff --git a/arm/p521/bignum_mod_p521_9.S b/arm/p521/bignum_mod_p521_9.S index 666caca4b5..874e9df091 100644 --- a/arm/p521/bignum_mod_p521_9.S +++ b/arm/p521/bignum_mod_p521_9.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_521 diff --git a/arm/p521/bignum_montmul_p521.S b/arm/p521/bignum_montmul_p521.S index 5d13ca650a..c0ac8cf926 100644 --- a/arm/p521/bignum_montmul_p521.S +++ b/arm/p521/bignum_montmul_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^576) mod p_521 diff --git a/arm/p521/bignum_montmul_p521_alt.S b/arm/p521/bignum_montmul_p521_alt.S index 4a26ae6ed2..6b0afeac1d 100644 --- a/arm/p521/bignum_montmul_p521_alt.S +++ b/arm/p521/bignum_montmul_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^576) mod p_521 diff --git a/arm/p521/bignum_montsqr_p521.S b/arm/p521/bignum_montsqr_p521.S index 9569487a38..45e57a666e 100644 --- a/arm/p521/bignum_montsqr_p521.S +++ b/arm/p521/bignum_montsqr_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^576) mod p_521 diff --git a/arm/p521/bignum_montsqr_p521_alt.S b/arm/p521/bignum_montsqr_p521_alt.S index f15b9fc7d1..1ae774f0d3 100644 --- a/arm/p521/bignum_montsqr_p521_alt.S +++ b/arm/p521/bignum_montsqr_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^576) mod p_521 diff --git a/arm/p521/bignum_mul_p521.S b/arm/p521/bignum_mul_p521.S index 26824c472c..12594faf9a 100644 --- a/arm/p521/bignum_mul_p521.S +++ b/arm/p521/bignum_mul_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced diff --git a/arm/p521/bignum_mul_p521_alt.S b/arm/p521/bignum_mul_p521_alt.S index 089da6d70f..d0c2cdb0e6 100644 --- a/arm/p521/bignum_mul_p521_alt.S +++ b/arm/p521/bignum_mul_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced diff --git a/arm/p521/bignum_neg_p521.S b/arm/p521/bignum_neg_p521.S index 8be4de4bc0..cdf7a9641c 100644 --- a/arm/p521/bignum_neg_p521.S +++ b/arm/p521/bignum_neg_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Negate modulo p_521, z := (-x) mod p_521, assuming x reduced diff --git a/arm/p521/bignum_optneg_p521.S b/arm/p521/bignum_optneg_p521.S index 03dbfc666c..74fac18e5a 100644 --- a/arm/p521/bignum_optneg_p521.S +++ b/arm/p521/bignum_optneg_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or diff --git a/arm/p521/bignum_sqr_p521.S b/arm/p521/bignum_sqr_p521.S index d8fb453e36..23f8a3b9b2 100644 --- a/arm/p521/bignum_sqr_p521.S +++ b/arm/p521/bignum_sqr_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced diff --git a/arm/p521/bignum_sqr_p521_alt.S b/arm/p521/bignum_sqr_p521_alt.S index fe2201c00e..7837b23a3d 100644 --- a/arm/p521/bignum_sqr_p521_alt.S +++ b/arm/p521/bignum_sqr_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced diff --git a/arm/p521/bignum_sub_p521.S b/arm/p521/bignum_sub_p521.S index 759e158039..4cc4e830b5 100644 --- a/arm/p521/bignum_sub_p521.S +++ b/arm/p521/bignum_sub_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Subtract modulo p_521, z := (x - y) mod p_521 diff --git a/arm/p521/bignum_tolebytes_p521.S b/arm/p521/bignum_tolebytes_p521.S index e798a86e8b..403f8fbd64 100644 --- a/arm/p521/bignum_tolebytes_p521.S +++ b/arm/p521/bignum_tolebytes_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert 9-digit 528-bit bignum to little-endian bytes diff --git a/arm/p521/bignum_tomont_p521.S b/arm/p521/bignum_tomont_p521.S index f3b114b893..833c07b847 100644 --- a/arm/p521/bignum_tomont_p521.S +++ b/arm/p521/bignum_tomont_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert to Montgomery form z := (2^576 * x) mod p_521 diff --git a/arm/p521/bignum_triple_p521.S b/arm/p521/bignum_triple_p521.S index b28ccd419e..7ce5d00915 100644 --- a/arm/p521/bignum_triple_p521.S +++ b/arm/p521/bignum_triple_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced diff --git a/arm/p521/p521_jadd.S b/arm/p521/p521_jadd.S index 85e62e01e7..928d7ea6cc 100644 --- a/arm/p521/p521_jadd.S +++ b/arm/p521/p521_jadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point addition on NIST curve P-521 in Jacobian coordinates diff --git a/arm/p521/p521_jdouble.S b/arm/p521/p521_jdouble.S index 242b492a81..6794e4cd92 100644 --- a/arm/p521/p521_jdouble.S +++ b/arm/p521/p521_jdouble.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point doubling on NIST curve P-521 in Jacobian coordinates diff --git a/arm/p521/p521_jmixadd.S b/arm/p521/p521_jmixadd.S index 6e8b46b00c..cd27d24eb8 100644 --- a/arm/p521/p521_jmixadd.S +++ b/arm/p521/p521_jmixadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point mixed addition on NIST curve P-521 in Jacobian coordinates diff --git a/x86_att/curve25519/bignum_neg_p25519.S b/x86_att/curve25519/bignum_neg_p25519.S index f0b7392f1f..02d01b1241 100644 --- a/x86_att/curve25519/bignum_neg_p25519.S +++ b/x86_att/curve25519/bignum_neg_p25519.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Negate modulo p_25519, z := (-x) mod p_25519, assuming x reduced diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 09d62751b8..f806f7c29c 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // The x25519 function for curve25519 diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 52600bedeb..7c1f089afd 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // The x25519 function for curve25519 diff --git a/x86_att/p384/bignum_add_p384.S b/x86_att/p384/bignum_add_p384.S index e527de6fba..b0a3c9c517 100644 --- a/x86_att/p384/bignum_add_p384.S +++ b/x86_att/p384/bignum_add_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced diff --git a/x86_att/p384/bignum_bigendian_6.S b/x86_att/p384/bignum_bigendian_6.S index 287e1bd392..7fa59c536e 100644 --- a/x86_att/p384/bignum_bigendian_6.S +++ b/x86_att/p384/bignum_bigendian_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert 6-digit (384-bit) bignum to/from big-endian form diff --git a/x86_att/p384/bignum_cmul_p384.S b/x86_att/p384/bignum_cmul_p384.S index 4f03a1bd18..6632a9ae7e 100644 --- a/x86_att/p384/bignum_cmul_p384.S +++ b/x86_att/p384/bignum_cmul_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming diff --git a/x86_att/p384/bignum_cmul_p384_alt.S b/x86_att/p384/bignum_cmul_p384_alt.S index ff4b38658a..c91629cd30 100644 --- a/x86_att/p384/bignum_cmul_p384_alt.S +++ b/x86_att/p384/bignum_cmul_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming diff --git a/x86_att/p384/bignum_deamont_p384.S b/x86_att/p384/bignum_deamont_p384.S index f029a18f4b..6b7daea25e 100644 --- a/x86_att/p384/bignum_deamont_p384.S +++ b/x86_att/p384/bignum_deamont_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 diff --git a/x86_att/p384/bignum_deamont_p384_alt.S b/x86_att/p384/bignum_deamont_p384_alt.S index 63b660f707..918a104f63 100644 --- a/x86_att/p384/bignum_deamont_p384_alt.S +++ b/x86_att/p384/bignum_deamont_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 diff --git a/x86_att/p384/bignum_demont_p384.S b/x86_att/p384/bignum_demont_p384.S index 75124a9e29..3dc1d734c4 100644 --- a/x86_att/p384/bignum_demont_p384.S +++ b/x86_att/p384/bignum_demont_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced diff --git a/x86_att/p384/bignum_demont_p384_alt.S b/x86_att/p384/bignum_demont_p384_alt.S index 1b4832bf03..d2dca9c4f2 100644 --- a/x86_att/p384/bignum_demont_p384_alt.S +++ b/x86_att/p384/bignum_demont_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced diff --git a/x86_att/p384/bignum_double_p384.S b/x86_att/p384/bignum_double_p384.S index b7b2e5795d..c06b218889 100644 --- a/x86_att/p384/bignum_double_p384.S +++ b/x86_att/p384/bignum_double_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Double modulo p_384, z := (2 * x) mod p_384, assuming x reduced diff --git a/x86_att/p384/bignum_half_p384.S b/x86_att/p384/bignum_half_p384.S index eba7322510..51afea03bb 100644 --- a/x86_att/p384/bignum_half_p384.S +++ b/x86_att/p384/bignum_half_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Halve modulo p_384, z := (x / 2) mod p_384, assuming x reduced diff --git a/x86_att/p384/bignum_littleendian_6.S b/x86_att/p384/bignum_littleendian_6.S index 670bca9136..a0eef1f00f 100644 --- a/x86_att/p384/bignum_littleendian_6.S +++ b/x86_att/p384/bignum_littleendian_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert 6-digit (384-bit) bignum to/from little-endian form diff --git a/x86_att/p384/bignum_mod_n384.S b/x86_att/p384/bignum_mod_n384.S index a03c07c76b..963873f72e 100644 --- a/x86_att/p384/bignum_mod_n384.S +++ b/x86_att/p384/bignum_mod_n384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_384 diff --git a/x86_att/p384/bignum_mod_n384_6.S b/x86_att/p384/bignum_mod_n384_6.S index 8a0d199b2a..273bce8b33 100644 --- a/x86_att/p384/bignum_mod_n384_6.S +++ b/x86_att/p384/bignum_mod_n384_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_384 diff --git a/x86_att/p384/bignum_mod_n384_alt.S b/x86_att/p384/bignum_mod_n384_alt.S index 0bb52c3983..ffd9c9d1b9 100644 --- a/x86_att/p384/bignum_mod_n384_alt.S +++ b/x86_att/p384/bignum_mod_n384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_384 diff --git a/x86_att/p384/bignum_mod_p384.S b/x86_att/p384/bignum_mod_p384.S index 934275f270..10414fea41 100644 --- a/x86_att/p384/bignum_mod_p384.S +++ b/x86_att/p384/bignum_mod_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_384 diff --git a/x86_att/p384/bignum_mod_p384_6.S b/x86_att/p384/bignum_mod_p384_6.S index f898e0b00b..08381a6c1e 100644 --- a/x86_att/p384/bignum_mod_p384_6.S +++ b/x86_att/p384/bignum_mod_p384_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_384 diff --git a/x86_att/p384/bignum_mod_p384_alt.S b/x86_att/p384/bignum_mod_p384_alt.S index e3ec6346b4..689f1d340c 100644 --- a/x86_att/p384/bignum_mod_p384_alt.S +++ b/x86_att/p384/bignum_mod_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_384 diff --git a/x86_att/p384/bignum_montmul_p384.S b/x86_att/p384/bignum_montmul_p384.S index 3ea3100dd9..718991aac1 100644 --- a/x86_att/p384/bignum_montmul_p384.S +++ b/x86_att/p384/bignum_montmul_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^384) mod p_384 diff --git a/x86_att/p384/bignum_montmul_p384_alt.S b/x86_att/p384/bignum_montmul_p384_alt.S index 3efcaced8c..3da172840e 100644 --- a/x86_att/p384/bignum_montmul_p384_alt.S +++ b/x86_att/p384/bignum_montmul_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^384) mod p_384 diff --git a/x86_att/p384/bignum_montsqr_p384.S b/x86_att/p384/bignum_montsqr_p384.S index 71901599aa..f8b4230b7e 100644 --- a/x86_att/p384/bignum_montsqr_p384.S +++ b/x86_att/p384/bignum_montsqr_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^384) mod p_384 diff --git a/x86_att/p384/bignum_montsqr_p384_alt.S b/x86_att/p384/bignum_montsqr_p384_alt.S index 652184c691..e04807766c 100644 --- a/x86_att/p384/bignum_montsqr_p384_alt.S +++ b/x86_att/p384/bignum_montsqr_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^384) mod p_384 diff --git a/x86_att/p384/bignum_mux_6.S b/x86_att/p384/bignum_mux_6.S index 0bf97ff129..5277428379 100644 --- a/x86_att/p384/bignum_mux_6.S +++ b/x86_att/p384/bignum_mux_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // 384-bit multiplex/select z := x (if p nonzero) or z := y (if p zero) diff --git a/x86_att/p384/bignum_neg_p384.S b/x86_att/p384/bignum_neg_p384.S index 8be796ba4e..51b0f41bb1 100644 --- a/x86_att/p384/bignum_neg_p384.S +++ b/x86_att/p384/bignum_neg_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced diff --git a/x86_att/p384/bignum_nonzero_6.S b/x86_att/p384/bignum_nonzero_6.S index d03e82bbae..8e17207d4a 100644 --- a/x86_att/p384/bignum_nonzero_6.S +++ b/x86_att/p384/bignum_nonzero_6.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // 384-bit nonzeroness test, returning 1 if x is nonzero, 0 if x is zero diff --git a/x86_att/p384/bignum_optneg_p384.S b/x86_att/p384/bignum_optneg_p384.S index 77cf1f6499..cee7be2f3c 100644 --- a/x86_att/p384/bignum_optneg_p384.S +++ b/x86_att/p384/bignum_optneg_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Optionally negate modulo p_384, z := (-x) mod p_384 (if p nonzero) or diff --git a/x86_att/p384/bignum_sub_p384.S b/x86_att/p384/bignum_sub_p384.S index bfd4fa2b62..8d4ae986a2 100644 --- a/x86_att/p384/bignum_sub_p384.S +++ b/x86_att/p384/bignum_sub_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Subtract modulo p_384, z := (x - y) mod p_384 diff --git a/x86_att/p384/bignum_tomont_p384.S b/x86_att/p384/bignum_tomont_p384.S index e17ef46dcf..70463c73a6 100644 --- a/x86_att/p384/bignum_tomont_p384.S +++ b/x86_att/p384/bignum_tomont_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert to Montgomery form z := (2^384 * x) mod p_384 diff --git a/x86_att/p384/bignum_tomont_p384_alt.S b/x86_att/p384/bignum_tomont_p384_alt.S index b81d583508..75ba90d7f7 100644 --- a/x86_att/p384/bignum_tomont_p384_alt.S +++ b/x86_att/p384/bignum_tomont_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert to Montgomery form z := (2^384 * x) mod p_384 diff --git a/x86_att/p384/bignum_triple_p384.S b/x86_att/p384/bignum_triple_p384.S index 356e20b316..2d3ae66bf7 100644 --- a/x86_att/p384/bignum_triple_p384.S +++ b/x86_att/p384/bignum_triple_p384.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Triple modulo p_384, z := (3 * x) mod p_384 diff --git a/x86_att/p384/bignum_triple_p384_alt.S b/x86_att/p384/bignum_triple_p384_alt.S index d8e0506a60..91efffbe1e 100644 --- a/x86_att/p384/bignum_triple_p384_alt.S +++ b/x86_att/p384/bignum_triple_p384_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Triple modulo p_384, z := (3 * x) mod p_384 diff --git a/x86_att/p384/p384_montjadd.S b/x86_att/p384/p384_montjadd.S index e550f38609..52b86b2063 100644 --- a/x86_att/p384/p384_montjadd.S +++ b/x86_att/p384/p384_montjadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates diff --git a/x86_att/p384/p384_montjdouble.S b/x86_att/p384/p384_montjdouble.S index d7de785797..d06d22bd8f 100644 --- a/x86_att/p384/p384_montjdouble.S +++ b/x86_att/p384/p384_montjdouble.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates diff --git a/x86_att/p384/p384_montjmixadd.S b/x86_att/p384/p384_montjmixadd.S index 6749209eb4..8a8c17c1a0 100644 --- a/x86_att/p384/p384_montjmixadd.S +++ b/x86_att/p384/p384_montjmixadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point mixed addition on NIST curve P-384 in Montgomery-Jacobian coordinates diff --git a/x86_att/p521/bignum_add_p521.S b/x86_att/p521/bignum_add_p521.S index 080e98807d..849a740971 100644 --- a/x86_att/p521/bignum_add_p521.S +++ b/x86_att/p521/bignum_add_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Add modulo p_521, z := (x + y) mod p_521, assuming x and y reduced diff --git a/x86_att/p521/bignum_cmul_p521.S b/x86_att/p521/bignum_cmul_p521.S index ce8dded8dd..7898293c6a 100644 --- a/x86_att/p521/bignum_cmul_p521.S +++ b/x86_att/p521/bignum_cmul_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming diff --git a/x86_att/p521/bignum_cmul_p521_alt.S b/x86_att/p521/bignum_cmul_p521_alt.S index 335d8e5cf7..c5f79a8189 100644 --- a/x86_att/p521/bignum_cmul_p521_alt.S +++ b/x86_att/p521/bignum_cmul_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming diff --git a/x86_att/p521/bignum_deamont_p521.S b/x86_att/p521/bignum_deamont_p521.S index de4f767fb1..d916da1f95 100644 --- a/x86_att/p521/bignum_deamont_p521.S +++ b/x86_att/p521/bignum_deamont_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^576) mod p_521 diff --git a/x86_att/p521/bignum_demont_p521.S b/x86_att/p521/bignum_demont_p521.S index 5e8c2c7406..182360406a 100644 --- a/x86_att/p521/bignum_demont_p521.S +++ b/x86_att/p521/bignum_demont_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_double_p521.S b/x86_att/p521/bignum_double_p521.S index ea904c8bc4..f3923d82ce 100644 --- a/x86_att/p521/bignum_double_p521.S +++ b/x86_att/p521/bignum_double_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Double modulo p_521, z := (2 * x) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_fromlebytes_p521.S b/x86_att/p521/bignum_fromlebytes_p521.S index 1c28af57a2..a5c9f491d9 100644 --- a/x86_att/p521/bignum_fromlebytes_p521.S +++ b/x86_att/p521/bignum_fromlebytes_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert little-endian bytes to 9-digit 528-bit bignum diff --git a/x86_att/p521/bignum_half_p521.S b/x86_att/p521/bignum_half_p521.S index 47fa3e6f49..9023beb032 100644 --- a/x86_att/p521/bignum_half_p521.S +++ b/x86_att/p521/bignum_half_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Halve modulo p_521, z := (x / 2) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_mod_n521_9.S b/x86_att/p521/bignum_mod_n521_9.S index d17c196747..9dcc73d15f 100644 --- a/x86_att/p521/bignum_mod_n521_9.S +++ b/x86_att/p521/bignum_mod_n521_9.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_521 diff --git a/x86_att/p521/bignum_mod_n521_9_alt.S b/x86_att/p521/bignum_mod_n521_9_alt.S index 3da8d997be..026a97e451 100644 --- a/x86_att/p521/bignum_mod_n521_9_alt.S +++ b/x86_att/p521/bignum_mod_n521_9_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo group order, z := x mod n_521 diff --git a/x86_att/p521/bignum_mod_p521_9.S b/x86_att/p521/bignum_mod_p521_9.S index 96d218065e..0f2e4267f4 100644 --- a/x86_att/p521/bignum_mod_p521_9.S +++ b/x86_att/p521/bignum_mod_p521_9.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Reduce modulo field characteristic, z := x mod p_521 diff --git a/x86_att/p521/bignum_montmul_p521.S b/x86_att/p521/bignum_montmul_p521.S index be232ea29c..3ee202d458 100644 --- a/x86_att/p521/bignum_montmul_p521.S +++ b/x86_att/p521/bignum_montmul_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^576) mod p_521 diff --git a/x86_att/p521/bignum_montmul_p521_alt.S b/x86_att/p521/bignum_montmul_p521_alt.S index 7631458755..dcef877ffd 100644 --- a/x86_att/p521/bignum_montmul_p521_alt.S +++ b/x86_att/p521/bignum_montmul_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery multiply, z := (x * y / 2^576) mod p_521 diff --git a/x86_att/p521/bignum_montsqr_p521.S b/x86_att/p521/bignum_montsqr_p521.S index 2d9acc7529..91cb9c318d 100644 --- a/x86_att/p521/bignum_montsqr_p521.S +++ b/x86_att/p521/bignum_montsqr_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^576) mod p_521 diff --git a/x86_att/p521/bignum_montsqr_p521_alt.S b/x86_att/p521/bignum_montsqr_p521_alt.S index 84f1af05b6..ad071a453b 100644 --- a/x86_att/p521/bignum_montsqr_p521_alt.S +++ b/x86_att/p521/bignum_montsqr_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Montgomery square, z := (x^2 / 2^576) mod p_521 diff --git a/x86_att/p521/bignum_mul_p521.S b/x86_att/p521/bignum_mul_p521.S index e25129c059..25073f9daf 100644 --- a/x86_att/p521/bignum_mul_p521.S +++ b/x86_att/p521/bignum_mul_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced diff --git a/x86_att/p521/bignum_mul_p521_alt.S b/x86_att/p521/bignum_mul_p521_alt.S index d05a7cb0b0..3224a86634 100644 --- a/x86_att/p521/bignum_mul_p521_alt.S +++ b/x86_att/p521/bignum_mul_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced diff --git a/x86_att/p521/bignum_neg_p521.S b/x86_att/p521/bignum_neg_p521.S index d6eb825754..484c1fca56 100644 --- a/x86_att/p521/bignum_neg_p521.S +++ b/x86_att/p521/bignum_neg_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Negate modulo p_521, z := (-x) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_optneg_p521.S b/x86_att/p521/bignum_optneg_p521.S index e910140ad8..d2434adb4c 100644 --- a/x86_att/p521/bignum_optneg_p521.S +++ b/x86_att/p521/bignum_optneg_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Optionally negate modulo p_521, z := (-x) mod p_521 (if p nonzero) or diff --git a/x86_att/p521/bignum_sqr_p521.S b/x86_att/p521/bignum_sqr_p521.S index 6e2d9b5562..b9a718cf9b 100644 --- a/x86_att/p521/bignum_sqr_p521.S +++ b/x86_att/p521/bignum_sqr_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_sqr_p521_alt.S b/x86_att/p521/bignum_sqr_p521_alt.S index c2fae3c25f..58f496e3f0 100644 --- a/x86_att/p521/bignum_sqr_p521_alt.S +++ b/x86_att/p521/bignum_sqr_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_sub_p521.S b/x86_att/p521/bignum_sub_p521.S index bcd9ed4a4b..99e0d96cd1 100644 --- a/x86_att/p521/bignum_sub_p521.S +++ b/x86_att/p521/bignum_sub_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Subtract modulo p_521, z := (x - y) mod p_521 diff --git a/x86_att/p521/bignum_tolebytes_p521.S b/x86_att/p521/bignum_tolebytes_p521.S index 7fb403239e..c5ea2ed539 100644 --- a/x86_att/p521/bignum_tolebytes_p521.S +++ b/x86_att/p521/bignum_tolebytes_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert 9-digit 528-bit bignum to little-endian bytes diff --git a/x86_att/p521/bignum_tomont_p521.S b/x86_att/p521/bignum_tomont_p521.S index df8f277345..a97beaccb1 100644 --- a/x86_att/p521/bignum_tomont_p521.S +++ b/x86_att/p521/bignum_tomont_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Convert to Montgomery form z := (2^576 * x) mod p_521 diff --git a/x86_att/p521/bignum_triple_p521.S b/x86_att/p521/bignum_triple_p521.S index 484dab04c8..6703a9cb22 100644 --- a/x86_att/p521/bignum_triple_p521.S +++ b/x86_att/p521/bignum_triple_p521.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced diff --git a/x86_att/p521/bignum_triple_p521_alt.S b/x86_att/p521/bignum_triple_p521_alt.S index 9e25a2c853..4598d9db87 100644 --- a/x86_att/p521/bignum_triple_p521_alt.S +++ b/x86_att/p521/bignum_triple_p521_alt.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced diff --git a/x86_att/p521/p521_jadd.S b/x86_att/p521/p521_jadd.S index c1ac9a235f..256ba845c4 100644 --- a/x86_att/p521/p521_jadd.S +++ b/x86_att/p521/p521_jadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point addition on NIST curve P-521 in Jacobian coordinates diff --git a/x86_att/p521/p521_jdouble.S b/x86_att/p521/p521_jdouble.S index 16a5deeb04..fd2a57bbc9 100644 --- a/x86_att/p521/p521_jdouble.S +++ b/x86_att/p521/p521_jdouble.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point doubling on NIST curve P-521 in Jacobian coordinates diff --git a/x86_att/p521/p521_jmixadd.S b/x86_att/p521/p521_jmixadd.S index 52e1568b56..7054905371 100644 --- a/x86_att/p521/p521_jmixadd.S +++ b/x86_att/p521/p521_jmixadd.S @@ -1,17 +1,5 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "LICENSE" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC // ---------------------------------------------------------------------------- // Point mixed addition on NIST curve P-521 in Jacobian coordinates From 9c62e931a5be1d5e4497882f76557eb3e39d0157 Mon Sep 17 00:00:00 2001 From: sachiang Date: Fri, 28 Oct 2022 14:45:01 -0700 Subject: [PATCH 19/42] update license headers of Makefiles and sed files s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/5746a969ad2cff94b6172151375fc4319375879f --- arm/p384/Makefile | 12 +----------- arm/p521/Makefile | 12 +----------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/arm/p384/Makefile b/arm/p384/Makefile index 11a5605504..d3feb070c7 100644 --- a/arm/p384/Makefile +++ b/arm/p384/Makefile @@ -1,16 +1,6 @@ ############################################################################# # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://aws.amazon.com/apache2.0 -# -# or in the "LICENSE" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 OR ISC ############################################################################# # If actually on an ARM8 machine, just use the GNU assmbler (as). Otherwise diff --git a/arm/p521/Makefile b/arm/p521/Makefile index 9ea36beb96..b8ad763c35 100644 --- a/arm/p521/Makefile +++ b/arm/p521/Makefile @@ -1,16 +1,6 @@ ############################################################################# # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://aws.amazon.com/apache2.0 -# -# or in the "LICENSE" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 OR ISC ############################################################################# # If actually on an ARM8 machine, just use the GNU assmbler (as). Otherwise From 31076a78d92e828abfc1b7361cfe8edb062e9321 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 8 Nov 2022 08:46:00 -0800 Subject: [PATCH 20/42] Add X25519 basepoint function The function "curve25519_x25519base" is the special case of "curve25519_x25519" where the point X coordinate argument is the standard basepoint 9. While faster than the general binary function, it is functionally equivalent, i.e. curve25519_x25519base(res,n) =~= curve25519_x25519(res,n,9) This update also adds support in the formal model for addressing relative to the program counter (the ADR instruction for ARM, and RIP-relative addresses for x86), which is used to index into the tables in the code. In the current version of the code the precomputed data tables are in the text segment directly after the program (not in a separate .rodata segment), to ensures that the code is position-independent and that the proof covers the actual code without making assumptions about linker modifications. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/1be9ed860f4b6262f252186cc97265d59dffac8e --- arm/curve25519/curve25519_x25519base.S | 8880 ++++++++++++++++ arm/curve25519/curve25519_x25519base_alt.S | 8762 ++++++++++++++++ x86_att/curve25519/curve25519_x25519base.S | 8946 ++++++++++++++++ .../curve25519/curve25519_x25519base_alt.S | 9022 +++++++++++++++++ 4 files changed, 35610 insertions(+) create mode 100644 arm/curve25519/curve25519_x25519base.S create mode 100644 arm/curve25519/curve25519_x25519base_alt.S create mode 100644 x86_att/curve25519/curve25519_x25519base.S create mode 100644 x86_att/curve25519/curve25519_x25519base_alt.S diff --git a/arm/curve25519/curve25519_x25519base.S b/arm/curve25519/curve25519_x25519base.S new file mode 100644 index 0000000000..314169cf3f --- /dev/null +++ b/arm/curve25519/curve25519_x25519base.S @@ -0,0 +1,8880 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 on base element 9 +// Input scalar[4]; output res[4] +// +// extern void curve25519_x25519base +// (uint64_t res[static 4],uint64_t scalar[static 4]); +// +// Given a scalar n, returns the X coordinate of n * G where G = (9,...) is +// the standard generator. The scalar is first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x23 + +// Other variables that are only needed prior to the modular inverse. + +#define tab x19 + +#define i x20 + +#define bias x21 + +#define bf x22 +#define ix x22 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) + +#define tabent sp, #(1*NUMSIZE) +#define ymx_2 sp, #(1*NUMSIZE) +#define xpy_2 sp, #(2*NUMSIZE) +#define kxy_2 sp, #(3*NUMSIZE) + +#define acc sp, #(4*NUMSIZE) +#define x_1 sp, #(4*NUMSIZE) +#define y_1 sp, #(5*NUMSIZE) +#define z_1 sp, #(6*NUMSIZE) +#define w_1 sp, #(7*NUMSIZE) +#define x_3 sp, #(4*NUMSIZE) +#define y_3 sp, #(5*NUMSIZE) +#define z_3 sp, #(6*NUMSIZE) +#define w_3 sp, #(7*NUMSIZE) + +#define tmpspace sp, #(8*NUMSIZE) +#define t0 sp, #(8*NUMSIZE) +#define t1 sp, #(9*NUMSIZE) +#define t2 sp, #(10*NUMSIZE) +#define t3 sp, #(11*NUMSIZE) +#define t4 sp, #(12*NUMSIZE) +#define t5 sp, #(13*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (14*NUMSIZE) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x7, x3, x5; \ + umulh x8, x3, x5; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + mul x11, x3, x5; \ + umulh x12, x3, x5; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + and x5, x11, #0xffffffff; \ + lsr x4, x11, #32; \ + mul x5, x3, x5; \ + mul x4, x3, x4; \ + adds x7, x7, x5; \ + and x5, x12, #0xffffffff; \ + lsr x12, x12, #32; \ + mul x5, x3, x5; \ + mul x12, x3, x12; \ + adcs x8, x8, x5; \ + and x5, x13, #0xffffffff; \ + lsr x13, x13, #32; \ + mul x5, x3, x5; \ + mul x13, x3, x13; \ + adcs x9, x9, x5; \ + and x5, x14, #0xffffffff; \ + lsr x14, x14, #32; \ + mul x5, x3, x5; \ + mul x14, x3, x14; \ + adcs x10, x10, x5; \ + cset x11, cs; \ + lsl x5, x4, #32; \ + adds x7, x7, x5; \ + extr x5, x12, x4, #32; \ + adcs x8, x8, x5; \ + extr x5, x13, x12, #32; \ + adcs x9, x9, x5; \ + extr x5, x14, x13, #32; \ + adcs x10, x10, x5; \ + lsr x5, x14, #32; \ + adc x11, x11, x5; \ + cmn x10, x10; \ + orr x10, x10, #0x8000000000000000; \ + adc x0, x11, x11; \ + mov x3, #0x13; \ + madd x5, x3, x0, x3; \ + adds x7, x7, x5; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adcs x10, x10, xzr; \ + csel x3, x3, xzr, cc; \ + subs x7, x7, x3; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + and x10, x10, #0x7fffffffffffffff; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x7, x3, x5; \ + umulh x8, x3, x5; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + mul x11, x3, x5; \ + umulh x12, x3, x5; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + and x5, x11, #0xffffffff; \ + lsr x4, x11, #32; \ + mul x5, x3, x5; \ + mul x4, x3, x4; \ + adds x7, x7, x5; \ + and x5, x12, #0xffffffff; \ + lsr x12, x12, #32; \ + mul x5, x3, x5; \ + mul x12, x3, x12; \ + adcs x8, x8, x5; \ + and x5, x13, #0xffffffff; \ + lsr x13, x13, #32; \ + mul x5, x3, x5; \ + mul x13, x3, x13; \ + adcs x9, x9, x5; \ + and x5, x14, #0xffffffff; \ + lsr x14, x14, #32; \ + mul x5, x3, x5; \ + mul x14, x3, x14; \ + adcs x10, x10, x5; \ + cset x11, cs; \ + lsl x5, x4, #32; \ + adds x7, x7, x5; \ + extr x5, x12, x4, #32; \ + adcs x8, x8, x5; \ + extr x5, x13, x12, #32; \ + adcs x9, x9, x5; \ + extr x5, x14, x13, #32; \ + adcs x10, x10, x5; \ + lsr x5, x14, #32; \ + adc x11, x11, x5; \ + cmn x10, x10; \ + bic x10, x10, #0x8000000000000000; \ + adc x0, x11, x11; \ + mov x3, #19; \ + mul x5, x3, x0; \ + adds x7, x7, x5; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adc x10, x10, xzr; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// Plain 4-digit add and doubling without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, +// indeed one < 2 * p_25519 for normalized inputs. + +#define add_4(P0,P1,P2) \ + ldp x0, x1, [P1]; \ + ldp x4, x5, [P2]; \ + adds x0, x0, x4; \ + adcs x1, x1, x5; \ + ldp x2, x3, [P1+16]; \ + ldp x6, x7, [P2+16]; \ + adcs x2, x2, x6; \ + adc x3, x3, x7; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16] + +#define double_4(P0,P1) \ + ldp x0, x1, [P1]; \ + adds x0, x0, x0; \ + adcs x1, x1, x1; \ + ldp x2, x3, [P1+16]; \ + adcs x2, x2, x2; \ + adc x3, x3, x3; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16] + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x3, #19; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + mov x4, #0x8000000000000000; \ + sbc x8, x8, x4; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 +// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +S2N_BN_SYMBOL(curve25519_x25519base): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the input scalar to its local variable while mangling it. +// In principle the mangling is into 01xxx...xxx000, but actually +// we only clear the top two bits so 00xxx...xxxxxx. The additional +// 2^254 * G is taken care of by the starting value for the addition +// chain below, while we never look at the three low bits at all. + + ldp x10, x11, [x1] + stp x10, x11, [scalar] + ldp x12, x13, [x1, #16] + bic x13, x13, #0xc000000000000000 + stp x12, x13, [scalar+16] + +// The main part of the computation is on the edwards25519 curve in +// extended-projective coordinates (X,Y,Z,T), representing a point +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// Only at the very end do we translate back to curve25519. So G +// below means the generator within edwards25519 corresponding to +// (9,...) for curve25519, via the standard isomorphism. +// +// Initialize accumulator "acc" to either (2^254 + 8) * G or just 2^254 * G +// depending on bit 3 of the scalar, the only nonzero bit of the bottom 4. +// Thus, we have effectively dealt with bits 0, 1, 2, 3, 254 and 255. + + ldr x0, [scalar] + ands xzr, x0, #8 + + adr x10, edwards25519_0g + adr x11, edwards25519_8g + ldp x0, x1, [x10] + ldp x2, x3, [x11] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc] + + ldp x0, x1, [x10, 1*16] + ldp x2, x3, [x11, 1*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+1*16] + + ldp x0, x1, [x10, 2*16] + ldp x2, x3, [x11, 2*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+2*16] + + ldp x0, x1, [x10, 3*16] + ldp x2, x3, [x11, 3*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+3*16] + + mov x0, #1 + stp x0, xzr, [acc+4*16] + stp xzr, xzr, [acc+5*16] + + ldp x0, x1, [x10, 4*16] + ldp x2, x3, [x11, 4*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+6*16] + + ldp x0, x1, [x10, 5*16] + ldp x2, x3, [x11, 5*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+7*16] + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 4 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * G at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * G = (16 * (h + 1) - (16 - l)) * G is used +// when l >= 9. Note that we can't have any bias left over at the +// end because of the clearing of bit 255 of the scalar, meaning the +// l >= 9 case cannot arise on the last iteration. + + mov i, 4 + adr tab, edwards25519_gtable + mov bias, xzr + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #15 + add bf, x2, bias + + cmp bf, 9 + cset bias, cs + + mov x0, 16 + sub x0, x0, bf + cmp bias, xzr + csel ix, x0, bf, ne + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp ix, #1 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #2 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #3 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #4 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #5 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #6 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #7 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #8 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + +// We now have the triple from the table in registers as follows +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmp bias, #0 + + csel x12, x0, x4, eq + csel x13, x1, x5, eq + csel x14, x2, x6, eq + csel x15, x3, x7, eq + stp x12, x13, [tabent] + stp x14, x15, [tabent+16] + + csel x12, x0, x4, ne + csel x13, x1, x5, ne + csel x14, x2, x6, ne + csel x15, x3, x7, ne + stp x12, x13, [tabent+32] + stp x14, x15, [tabent+48] + + mov x0, #-19 + subs x0, x0, x8 + mov x2, #-1 + sbcs x1, x2, x9 + sbcs x2, x2, x10 + mov x3, #0x7FFFFFFFFFFFFFFF + sbc x3, x3, x11 + + cmp ix, xzr + ccmp bias, xzr, #4, ne + + csel x0, x0, x8, ne + csel x1, x1, x9, ne + stp x0, x1, [tabent+64] + csel x2, x2, x10, ne + csel x3, x3, x11, ne + stp x2, x3, [tabent+80] + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd(acc,acc,tabent) + + double_4(t0,z_1) + sub_4(t1,y_1,x_1) + add_4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_p25519(z_3,t4,t0) + mul_p25519(x_3,t5,t4) + mul_p25519(y_3,t0,t1) + mul_p25519(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + add i, i, 4 + cmp i, 256 + bcc scalarloop + +// Now we need to translate from Edwards curve edwards25519 back +// to the Montgomery form curve25519. The mapping in the affine +// representations is +// +// (x,y) |-> ((1 + y) / (1 - y), c * (1 + y) / ((1 - y) * x)) +// +// For x25519, we only need the x coordinate, and we compute this as +// +// (1 + y) / (1 - y) = (x + x * y) / (x - x * y) +// = (X/Z + T/Z) / (X/Z - T/Z) +// = (X + T) / (X - T) +// = (X + T) * inverse(X - T) +// +// We could equally well use (Z + Y) / (Z - Y), but the above has the +// same cost, and it more explicitly forces zero output whenever X = 0, +// regardless of how the modular inverse behaves on zero inputs. In +// the present setting (base point 9, mangled scalar) that doesn't +// really matter anyway since X = 0 never arises, but it seems a +// little bit tidier. Note that both Edwards point (0,1) which maps to +// the Montgomery point at infinity, and Edwards (0,-1) which maps to +// Montgomery (0,0) [this is the 2-torsion point] are both by definition +// mapped to 0 by the X coordinate mapping used to define curve25519. +// +// First the addition and subtraction: + + add_4(y_3,x_3,w_3) + sub_4(z_3,x_3,w_3) + +// Prepare to call the modular inverse function to get x_3 = 1/z_3 + + mov x0, 4 + add x1, x_3 + add x2, z_3 + adr x3, p_25519 + add x4, tmpspace + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// The final result is (X + T) / (X - T) + + mul_p25519(resx,y_3,x_3) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 2^254 * G and (2^254 + 8) * G in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x251037f7cf4e861d + .quad 0x10ede0fb19fb128f + .quad 0x96c033b175f5e2c8 + .quad 0x055f070d6c15fb0d + + .quad 0x7c52af2c97473e69 + .quad 0x022f82391bad8378 + .quad 0x9991e1b02adb476f + .quad 0x511144a03a99b855 + + .quad 0x5fafc3b88ff2e4ae + .quad 0x855e4ff0de1230ff + .quad 0x72e302a348492870 + .quad 0x1253c19e53dbe1bc + +edwards25519_8g: + + .quad 0x331d086e0d9abcaa + .quad 0x1e23c96d311a10c9 + .quad 0x96d0f95e58c13478 + .quad 0x2f72f7384fcfcc59 + + .quad 0x39a6cd1cfd7d87c9 + .quad 0x9867a0abd8ae153a + .quad 0xa49d2a5f35986745 + .quad 0x57012940cdfe82e1 + + .quad 0x5046a6532ec5544a + .quad 0x6d674004739ff6c9 + .quad 0x9bbaa44b234a70e3 + .quad 0x5e6d8901138cf386 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + + // 2^252 * 1 * G + + .quad 0xb1507ca1ab1c6eb9 + .quad 0xbd448f3e16b687b3 + .quad 0x3455fb7f2c7a91ab + .quad 0x7579229e2f2adec1 + .quad 0x6ab5dcb85b1c16b7 + .quad 0x94c0fce83c7b27a5 + .quad 0xa4b11c1a735517be + .quad 0x499238d0ba0eafaa + .quad 0xecf46e527aba8b57 + .quad 0x15a08c478bd1647b + .quad 0x7af1c6a65f706fef + .quad 0x6345fa78f03a30d5 + + // 2^252 * 2 * G + + .quad 0xdf02f95f1015e7a1 + .quad 0x790ec41da9b40263 + .quad 0x4d3a0ea133ea1107 + .quad 0x54f70be7e33af8c9 + .quad 0x93d3cbe9bdd8f0a4 + .quad 0xdb152c1bfd177302 + .quad 0x7dbddc6d7f17a875 + .quad 0x3e1a71cc8f426efe + .quad 0xc83ca3e390babd62 + .quad 0x80ede3670291c833 + .quad 0xc88038ccd37900c4 + .quad 0x2c5fc0231ec31fa1 + + // 2^252 * 3 * G + + .quad 0xfeba911717038b4f + .quad 0xe5123721c9deef81 + .quad 0x1c97e4e75d0d8834 + .quad 0x68afae7a23dc3bc6 + .quad 0xc422e4d102456e65 + .quad 0x87414ac1cad47b91 + .quad 0x1592e2bba2b6ffdd + .quad 0x75d9d2bff5c2100f + .quad 0x5bd9b4763626e81c + .quad 0x89966936bca02edd + .quad 0x0a41193d61f077b3 + .quad 0x3097a24200ce5471 + + // 2^252 * 4 * G + + .quad 0x57427734c7f8b84c + .quad 0xf141a13e01b270e9 + .quad 0x02d1adfeb4e564a6 + .quad 0x4bb23d92ce83bd48 + .quad 0xa162e7246695c486 + .quad 0x131d633435a89607 + .quad 0x30521561a0d12a37 + .quad 0x56704bada6afb363 + .quad 0xaf6c4aa752f912b9 + .quad 0x5e665f6cd86770c8 + .quad 0x4c35ac83a3c8cd58 + .quad 0x2b7a29c010a58a7e + + // 2^252 * 5 * G + + .quad 0xc4007f77d0c1cec3 + .quad 0x8d1020b6bac492f8 + .quad 0x32ec29d57e69daaf + .quad 0x599408759d95fce0 + .quad 0x33810a23bf00086e + .quad 0xafce925ee736ff7c + .quad 0x3d60e670e24922d4 + .quad 0x11ce9e714f96061b + .quad 0x219ef713d815bac1 + .quad 0xf141465d485be25c + .quad 0x6d5447cc4e513c51 + .quad 0x174926be5ef44393 + + // 2^252 * 6 * G + + .quad 0xb5deb2f9fc5bd5bb + .quad 0x92daa72ae1d810e1 + .quad 0xafc4cfdcb72a1c59 + .quad 0x497d78813fc22a24 + .quad 0x3ef5d41593ea022e + .quad 0x5cbcc1a20ed0eed6 + .quad 0x8fd24ecf07382c8c + .quad 0x6fa42ead06d8e1ad + .quad 0xe276824a1f73371f + .quad 0x7f7cf01c4f5b6736 + .quad 0x7e201fe304fa46e7 + .quad 0x785a36a357808c96 + + // 2^252 * 7 * G + + .quad 0x825fbdfd63014d2b + .quad 0xc852369c6ca7578b + .quad 0x5b2fcd285c0b5df0 + .quad 0x12ab214c58048c8f + .quad 0x070442985d517bc3 + .quad 0x6acd56c7ae653678 + .quad 0x00a27983985a7763 + .quad 0x5167effae512662b + .quad 0xbd4ea9e10f53c4b6 + .quad 0x1673dc5f8ac91a14 + .quad 0xa8f81a4e2acc1aba + .quad 0x33a92a7924332a25 + + // 2^252 * 8 * G + + .quad 0x9dd1f49927996c02 + .quad 0x0cb3b058e04d1752 + .quad 0x1f7e88967fd02c3e + .quad 0x2f964268cb8b3eb1 + .quad 0x7ba95ba0218f2ada + .quad 0xcff42287330fb9ca + .quad 0xdada496d56c6d907 + .quad 0x5380c296f4beee54 + .quad 0x9d4f270466898d0a + .quad 0x3d0987990aff3f7a + .quad 0xd09ef36267daba45 + .quad 0x7761455e7b1c669c + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/curve25519_x25519base_alt.S b/arm/curve25519/curve25519_x25519base_alt.S new file mode 100644 index 0000000000..1ab9551f53 --- /dev/null +++ b/arm/curve25519/curve25519_x25519base_alt.S @@ -0,0 +1,8762 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 on base element 9 +// Input scalar[4]; output res[4] +// +// extern void curve25519_x25519base_alt +// (uint64_t res[static 4],uint64_t scalar[static 4]); +// +// Given a scalar n, returns the X coordinate of n * G where G = (9,...) is +// the standard generator. The scalar is first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x23 + +// Other variables that are only needed prior to the modular inverse. + +#define tab x19 + +#define i x20 + +#define bias x21 + +#define bf x22 +#define ix x22 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) + +#define tabent sp, #(1*NUMSIZE) +#define ymx_2 sp, #(1*NUMSIZE) +#define xpy_2 sp, #(2*NUMSIZE) +#define kxy_2 sp, #(3*NUMSIZE) + +#define acc sp, #(4*NUMSIZE) +#define x_1 sp, #(4*NUMSIZE) +#define y_1 sp, #(5*NUMSIZE) +#define z_1 sp, #(6*NUMSIZE) +#define w_1 sp, #(7*NUMSIZE) +#define x_3 sp, #(4*NUMSIZE) +#define y_3 sp, #(5*NUMSIZE) +#define z_3 sp, #(6*NUMSIZE) +#define w_3 sp, #(7*NUMSIZE) + +#define tmpspace sp, #(8*NUMSIZE) +#define t0 sp, #(8*NUMSIZE) +#define t1 sp, #(9*NUMSIZE) +#define t2 sp, #(10*NUMSIZE) +#define t3 sp, #(11*NUMSIZE) +#define t4 sp, #(12*NUMSIZE) +#define t5 sp, #(13*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (14*NUMSIZE) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519_alt. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #38; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, hs; \ + adds x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + orr x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #19; \ + madd x11, x7, x8, x7; \ + adds x12, x12, x11; \ + adcs x13, x13, xzr; \ + adcs x14, x14, xzr; \ + adcs x15, x15, xzr; \ + csel x7, x7, xzr, lo; \ + subs x12, x12, x7; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + and x15, x15, #0x7fffffffffffffff; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #38; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, hs; \ + adds x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + bic x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #19; \ + mul x11, x7, x8; \ + adds x12, x12, x11; \ + adcs x13, x13, xzr; \ + adcs x14, x14, xzr; \ + adc x15, x15, xzr; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// Plain 4-digit add and doubling without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, +// indeed one < 2 * p_25519 for normalized inputs. + +#define add_4(P0,P1,P2) \ + ldp x0, x1, [P1]; \ + ldp x4, x5, [P2]; \ + adds x0, x0, x4; \ + adcs x1, x1, x5; \ + ldp x2, x3, [P1+16]; \ + ldp x6, x7, [P2+16]; \ + adcs x2, x2, x6; \ + adc x3, x3, x7; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16] + +#define double_4(P0,P1) \ + ldp x0, x1, [P1]; \ + adds x0, x0, x0; \ + adcs x1, x1, x1; \ + ldp x2, x3, [P1+16]; \ + adcs x2, x2, x2; \ + adc x3, x3, x3; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16] + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x3, #19; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + mov x4, #0x8000000000000000; \ + sbc x8, x8, x4; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 +// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +S2N_BN_SYMBOL(curve25519_x25519base_alt): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the input scalar to its local variable while mangling it. +// In principle the mangling is into 01xxx...xxx000, but actually +// we only clear the top two bits so 00xxx...xxxxxx. The additional +// 2^254 * G is taken care of by the starting value for the addition +// chain below, while we never look at the three low bits at all. + + ldp x10, x11, [x1] + stp x10, x11, [scalar] + ldp x12, x13, [x1, #16] + bic x13, x13, #0xc000000000000000 + stp x12, x13, [scalar+16] + +// The main part of the computation is on the edwards25519 curve in +// extended-projective coordinates (X,Y,Z,T), representing a point +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// Only at the very end do we translate back to curve25519. So G +// below means the generator within edwards25519 corresponding to +// (9,...) for curve25519, via the standard isomorphism. +// +// Initialize accumulator "acc" to either (2^254 + 8) * G or just 2^254 * G +// depending on bit 3 of the scalar, the only nonzero bit of the bottom 4. +// Thus, we have effectively dealt with bits 0, 1, 2, 3, 254 and 255. + + ldr x0, [scalar] + ands xzr, x0, #8 + + adr x10, edwards25519_0g + adr x11, edwards25519_8g + ldp x0, x1, [x10] + ldp x2, x3, [x11] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc] + + ldp x0, x1, [x10, 1*16] + ldp x2, x3, [x11, 1*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+1*16] + + ldp x0, x1, [x10, 2*16] + ldp x2, x3, [x11, 2*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+2*16] + + ldp x0, x1, [x10, 3*16] + ldp x2, x3, [x11, 3*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+3*16] + + mov x0, #1 + stp x0, xzr, [acc+4*16] + stp xzr, xzr, [acc+5*16] + + ldp x0, x1, [x10, 4*16] + ldp x2, x3, [x11, 4*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+6*16] + + ldp x0, x1, [x10, 5*16] + ldp x2, x3, [x11, 5*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+7*16] + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 4 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * G at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * G = (16 * (h + 1) - (16 - l)) * G is used +// when l >= 9. Note that we can't have any bias left over at the +// end because of the clearing of bit 255 of the scalar, meaning the +// l >= 9 case cannot arise on the last iteration. + + mov i, 4 + adr tab, edwards25519_gtable + mov bias, xzr + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #15 + add bf, x2, bias + + cmp bf, 9 + cset bias, cs + + mov x0, 16 + sub x0, x0, bf + cmp bias, xzr + csel ix, x0, bf, ne + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp ix, #1 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #2 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #3 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #4 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #5 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #6 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #7 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #8 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + +// We now have the triple from the table in registers as follows +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmp bias, #0 + + csel x12, x0, x4, eq + csel x13, x1, x5, eq + csel x14, x2, x6, eq + csel x15, x3, x7, eq + stp x12, x13, [tabent] + stp x14, x15, [tabent+16] + + csel x12, x0, x4, ne + csel x13, x1, x5, ne + csel x14, x2, x6, ne + csel x15, x3, x7, ne + stp x12, x13, [tabent+32] + stp x14, x15, [tabent+48] + + mov x0, #-19 + subs x0, x0, x8 + mov x2, #-1 + sbcs x1, x2, x9 + sbcs x2, x2, x10 + mov x3, #0x7FFFFFFFFFFFFFFF + sbc x3, x3, x11 + + cmp ix, xzr + ccmp bias, xzr, #4, ne + + csel x0, x0, x8, ne + csel x1, x1, x9, ne + stp x0, x1, [tabent+64] + csel x2, x2, x10, ne + csel x3, x3, x11, ne + stp x2, x3, [tabent+80] + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd_alt(acc,acc,tabent) + + double_4(t0,z_1) + sub_4(t1,y_1,x_1) + add_4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_p25519(z_3,t4,t0) + mul_p25519(x_3,t5,t4) + mul_p25519(y_3,t0,t1) + mul_p25519(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + add i, i, 4 + cmp i, 256 + bcc scalarloop + +// Now we need to translate from Edwards curve edwards25519 back +// to the Montgomery form curve25519. The mapping in the affine +// representations is +// +// (x,y) |-> ((1 + y) / (1 - y), c * (1 + y) / ((1 - y) * x)) +// +// For x25519, we only need the x coordinate, and we compute this as +// +// (1 + y) / (1 - y) = (x + x * y) / (x - x * y) +// = (X/Z + T/Z) / (X/Z - T/Z) +// = (X + T) / (X - T) +// = (X + T) * inverse(X - T) +// +// We could equally well use (Z + Y) / (Z - Y), but the above has the +// same cost, and it more explicitly forces zero output whenever X = 0, +// regardless of how the modular inverse behaves on zero inputs. In +// the present setting (base point 9, mangled scalar) that doesn't +// really matter anyway since X = 0 never arises, but it seems a +// little bit tidier. Note that both Edwards point (0,1) which maps to +// the Montgomery point at infinity, and Edwards (0,-1) which maps to +// Montgomery (0,0) [this is the 2-torsion point] are both by definition +// mapped to 0 by the X coordinate mapping used to define curve25519. +// +// First the addition and subtraction: + + add_4(y_3,x_3,w_3) + sub_4(z_3,x_3,w_3) + +// Prepare to call the modular inverse function to get x_3 = 1/z_3 + + mov x0, 4 + add x1, x_3 + add x2, z_3 + adr x3, p_25519 + add x4, tmpspace + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// The final result is (X + T) / (X - T) + + mul_p25519(resx,y_3,x_3) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 2^254 * G and (2^254 + 8) * G in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x251037f7cf4e861d + .quad 0x10ede0fb19fb128f + .quad 0x96c033b175f5e2c8 + .quad 0x055f070d6c15fb0d + + .quad 0x7c52af2c97473e69 + .quad 0x022f82391bad8378 + .quad 0x9991e1b02adb476f + .quad 0x511144a03a99b855 + + .quad 0x5fafc3b88ff2e4ae + .quad 0x855e4ff0de1230ff + .quad 0x72e302a348492870 + .quad 0x1253c19e53dbe1bc + +edwards25519_8g: + + .quad 0x331d086e0d9abcaa + .quad 0x1e23c96d311a10c9 + .quad 0x96d0f95e58c13478 + .quad 0x2f72f7384fcfcc59 + + .quad 0x39a6cd1cfd7d87c9 + .quad 0x9867a0abd8ae153a + .quad 0xa49d2a5f35986745 + .quad 0x57012940cdfe82e1 + + .quad 0x5046a6532ec5544a + .quad 0x6d674004739ff6c9 + .quad 0x9bbaa44b234a70e3 + .quad 0x5e6d8901138cf386 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + + // 2^252 * 1 * G + + .quad 0xb1507ca1ab1c6eb9 + .quad 0xbd448f3e16b687b3 + .quad 0x3455fb7f2c7a91ab + .quad 0x7579229e2f2adec1 + .quad 0x6ab5dcb85b1c16b7 + .quad 0x94c0fce83c7b27a5 + .quad 0xa4b11c1a735517be + .quad 0x499238d0ba0eafaa + .quad 0xecf46e527aba8b57 + .quad 0x15a08c478bd1647b + .quad 0x7af1c6a65f706fef + .quad 0x6345fa78f03a30d5 + + // 2^252 * 2 * G + + .quad 0xdf02f95f1015e7a1 + .quad 0x790ec41da9b40263 + .quad 0x4d3a0ea133ea1107 + .quad 0x54f70be7e33af8c9 + .quad 0x93d3cbe9bdd8f0a4 + .quad 0xdb152c1bfd177302 + .quad 0x7dbddc6d7f17a875 + .quad 0x3e1a71cc8f426efe + .quad 0xc83ca3e390babd62 + .quad 0x80ede3670291c833 + .quad 0xc88038ccd37900c4 + .quad 0x2c5fc0231ec31fa1 + + // 2^252 * 3 * G + + .quad 0xfeba911717038b4f + .quad 0xe5123721c9deef81 + .quad 0x1c97e4e75d0d8834 + .quad 0x68afae7a23dc3bc6 + .quad 0xc422e4d102456e65 + .quad 0x87414ac1cad47b91 + .quad 0x1592e2bba2b6ffdd + .quad 0x75d9d2bff5c2100f + .quad 0x5bd9b4763626e81c + .quad 0x89966936bca02edd + .quad 0x0a41193d61f077b3 + .quad 0x3097a24200ce5471 + + // 2^252 * 4 * G + + .quad 0x57427734c7f8b84c + .quad 0xf141a13e01b270e9 + .quad 0x02d1adfeb4e564a6 + .quad 0x4bb23d92ce83bd48 + .quad 0xa162e7246695c486 + .quad 0x131d633435a89607 + .quad 0x30521561a0d12a37 + .quad 0x56704bada6afb363 + .quad 0xaf6c4aa752f912b9 + .quad 0x5e665f6cd86770c8 + .quad 0x4c35ac83a3c8cd58 + .quad 0x2b7a29c010a58a7e + + // 2^252 * 5 * G + + .quad 0xc4007f77d0c1cec3 + .quad 0x8d1020b6bac492f8 + .quad 0x32ec29d57e69daaf + .quad 0x599408759d95fce0 + .quad 0x33810a23bf00086e + .quad 0xafce925ee736ff7c + .quad 0x3d60e670e24922d4 + .quad 0x11ce9e714f96061b + .quad 0x219ef713d815bac1 + .quad 0xf141465d485be25c + .quad 0x6d5447cc4e513c51 + .quad 0x174926be5ef44393 + + // 2^252 * 6 * G + + .quad 0xb5deb2f9fc5bd5bb + .quad 0x92daa72ae1d810e1 + .quad 0xafc4cfdcb72a1c59 + .quad 0x497d78813fc22a24 + .quad 0x3ef5d41593ea022e + .quad 0x5cbcc1a20ed0eed6 + .quad 0x8fd24ecf07382c8c + .quad 0x6fa42ead06d8e1ad + .quad 0xe276824a1f73371f + .quad 0x7f7cf01c4f5b6736 + .quad 0x7e201fe304fa46e7 + .quad 0x785a36a357808c96 + + // 2^252 * 7 * G + + .quad 0x825fbdfd63014d2b + .quad 0xc852369c6ca7578b + .quad 0x5b2fcd285c0b5df0 + .quad 0x12ab214c58048c8f + .quad 0x070442985d517bc3 + .quad 0x6acd56c7ae653678 + .quad 0x00a27983985a7763 + .quad 0x5167effae512662b + .quad 0xbd4ea9e10f53c4b6 + .quad 0x1673dc5f8ac91a14 + .quad 0xa8f81a4e2acc1aba + .quad 0x33a92a7924332a25 + + // 2^252 * 8 * G + + .quad 0x9dd1f49927996c02 + .quad 0x0cb3b058e04d1752 + .quad 0x1f7e88967fd02c3e + .quad 0x2f964268cb8b3eb1 + .quad 0x7ba95ba0218f2ada + .quad 0xcff42287330fb9ca + .quad 0xdada496d56c6d907 + .quad 0x5380c296f4beee54 + .quad 0x9d4f270466898d0a + .quad 0x3d0987990aff3f7a + .quad 0xd09ef36267daba45 + .quad 0x7761455e7b1c669c + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/curve25519_x25519base.S b/x86_att/curve25519/curve25519_x25519base.S new file mode 100644 index 0000000000..177ad685de --- /dev/null +++ b/x86_att/curve25519/curve25519_x25519base.S @@ -0,0 +1,8946 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 on base element 9 +// Input scalar[4]; output res[4] +// +// extern void curve25519_x25519base +// (uint64_t res[static 4],uint64_t scalar[static 4]); +// +// Given a scalar n, returns the X coordinate of n * G where G = (9,...) is +// the standard generator. The scalar is first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). +// +// Standard x86-64 ABI: RDI = res, RSI = scalar +// Microsoft x64 ABI: RCX = res, RDX = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// The result "resx" assumes the "res" pointer has been preloaded into %rbp. + +#define resx (0*NUMSIZE)(%rbp) + +#define scalar (0*NUMSIZE)(%rsp) + +#define tabent (1*NUMSIZE)(%rsp) +#define ymx_2 (1*NUMSIZE)(%rsp) +#define xpy_2 (2*NUMSIZE)(%rsp) +#define kxy_2 (3*NUMSIZE)(%rsp) + +#define acc (4*NUMSIZE)(%rsp) +#define x_1 (4*NUMSIZE)(%rsp) +#define y_1 (5*NUMSIZE)(%rsp) +#define z_1 (6*NUMSIZE)(%rsp) +#define w_1 (7*NUMSIZE)(%rsp) +#define x_3 (4*NUMSIZE)(%rsp) +#define y_3 (5*NUMSIZE)(%rsp) +#define z_3 (6*NUMSIZE)(%rsp) +#define w_3 (7*NUMSIZE)(%rsp) + +#define tmpspace (8*NUMSIZE)(%rsp) +#define t0 (8*NUMSIZE)(%rsp) +#define t1 (9*NUMSIZE)(%rsp) +#define t2 (10*NUMSIZE)(%rsp) +#define t3 (11*NUMSIZE)(%rsp) +#define t4 (12*NUMSIZE)(%rsp) +#define t5 (13*NUMSIZE)(%rsp) + +// Stable homes for the input result pointer, and other variables + +#define res 14*NUMSIZE(%rsp) + +#define i 14*NUMSIZE+8(%rsp) + +#define bias 14*NUMSIZE+16(%rsp) + +#define bf 14*NUMSIZE+24(%rsp) +#define ix 14*NUMSIZE+24(%rsp) + +#define tab 15*NUMSIZE(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (15*NUMSIZE+8) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519. + +#define mul_p25519(P0,P1,P2) \ + xorl %esi, %esi ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rsi, %r12 ; \ + xorl %esi, %esi ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rsi, %r13 ; \ + adcxq %rsi, %r13 ; \ + xorl %esi, %esi ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rsi, %r14 ; \ + adcxq %rsi, %r14 ; \ + xorl %esi, %esi ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rsi, %r15 ; \ + adcxq %rsi, %r15 ; \ + movl $0x26, %edx ; \ + xorl %esi, %esi ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rsi, %r12 ; \ + adcxq %rsi, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + movl $0x13, %edx ; \ + incq %r12; \ + bts $63, %r11 ; \ + mulxq %r12, %rax, %rbx ; \ + addq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rsi, %r10 ; \ + adcq %rsi, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rdx, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rsi, %r9 ; \ + sbbq %rsi, %r10 ; \ + sbbq %rsi, %r11 ; \ + btr $63, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + xorl %ecx, %ecx ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rcx, %r13 ; \ + xorl %ecx, %ecx ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcxq %rcx, %r14 ; \ + xorl %ecx, %ecx ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcxq %rcx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ecx, %ecx ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcxq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Plain 4-digit add and doubling without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, +// indeed one < 2 * p_25519 for normalized inputs. + +#define add_4(P0,P1,P2) \ + movq P1, %rax ; \ + addq P2, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + adcq 8+P2, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + adcq 16+P2, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + adcq 24+P2, %rax ; \ + movq %rax, 24+P0 + +#define double_4(P0,P1) \ + movq P1, %rax ; \ + addq %rax, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + adcq %rax, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + adcq %rax, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + adcq %rax, %rax ; \ + movq %rax, 24+P0 + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(P0,P1,P2) \ + movq P1, %r8 ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + subq $19, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0, %r9 ; \ + movq %r9, 8+P0 ; \ + sbbq $0, %r10 ; \ + movq %r10, 16+P0 ; \ + sbbq $0, %rax ; \ + btc $63, %rax ; \ + movq %rax, 24+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 +// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. + +#define add_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +S2N_BN_SYMBOL(curve25519_x25519base): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from keeping code and data together. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + callq curve25519_x25519base_standard + popq %rsi + popq %rdi + ret + +curve25519_x25519base_standard: +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy the input scalar to its local variable while mangling it. +// In principle the mangling is into 01xxx...xxx000, but actually +// we only clear the top two bits so 00xxx...xxxxxx. The additional +// 2^254 * G is taken care of by the starting value for the addition +// chain below, while we never look at the three low bits at all. + + movq (%rsi), %rax + movq %rax, (%rsp) + movq 8(%rsi), %rax + movq %rax, 8(%rsp) + movq 16(%rsi), %rax + movq %rax, 16(%rsp) + movq $0x3fffffffffffffff, %rax + andq 24(%rsi), %rax + movq %rax, 24(%rsp) + +// The main part of the computation is on the edwards25519 curve in +// extended-projective coordinates (X,Y,Z,T), representing a point +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// Only at the very end do we translate back to curve25519. So G +// below means the generator within edwards25519 corresponding to +// (9,...) for curve25519, via the standard isomorphism. +// +// Initialize accumulator "acc" to either (2^254 + 8) * G or just 2^254 * G +// depending on bit 3 of the scalar, the only nonzero bit of the bottom 4. +// Thus, we have effectively dealt with bits 0, 1, 2, 3, 254 and 255. + + movq (%rsp), %rax + andq $8, %rax + + leaq edwards25519_0g(%rip), %r10 + leaq edwards25519_8g(%rip), %r11 + + movq (%r10), %rax + movq (%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*16(%rsp) + + movq 8*1(%r10), %rax + movq 8*1(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*17(%rsp) + + movq 8*2(%r10), %rax + movq 8*2(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*18(%rsp) + + movq 8*3(%r10), %rax + movq 8*3(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*19(%rsp) + + movq 8*4(%r10), %rax + movq 8*4(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*20(%rsp) + + movq 8*5(%r10), %rax + movq 8*5(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*21(%rsp) + + movq 8*6(%r10), %rax + movq 8*6(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*22(%rsp) + + movq 8*7(%r10), %rax + movq 8*7(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*23(%rsp) + + movl $1, %eax + movq %rax, 8*24(%rsp) + movl $0, %eax + movq %rax, 8*25(%rsp) + movq %rax, 8*26(%rsp) + movq %rax, 8*27(%rsp) + + movq 8*8(%r10), %rax + movq 8*8(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*28(%rsp) + + movq 8*9(%r10), %rax + movq 8*9(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*29(%rsp) + + movq 8*10(%r10), %rax + movq 8*10(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*30(%rsp) + + movq 8*11(%r10), %rax + movq 8*11(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*31(%rsp) + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 4 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * G at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * G = (16 * (h + 1) - (16 - l)) * G is used +// when l >= 9. Note that we can't have any bias left over at the +// end because of the clearing of bit 255 of the scalar, meaning the +// l >= 9 case cannot arise on the last iteration. + + movq $4, i + leaq edwards25519_gtable(%rip), %rax + movq %rax, tab + movq $0, bias + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq (%rsp,%rax,8), %rax // Exploiting scalar = sp exactly + shrq %cl, %rax + andq $15, %rax + addq bias, %rax + movq %rax, bf + + cmpq $9, bf + sbbq %rax, %rax + incq %rax + movq %rax, bias + + movq $16, %rdi + subq bf, %rdi + cmpq $0, bias + cmovzq bf, %rdi + movq %rdi, ix + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + movq tab, %rbp + + cmpq $1, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + + addq $96, %rbp + movq %rbp, tab + +// We now have the triple from the table in registers as follows +// +// [%rdx;%rcx;%rbx;%rax] = y - x +// [%r11;%r10;%r9;%r8] = x + y +// [%r15;%r14;%r13;%r12] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmpq $0, bias + + movq %rax, %rsi + cmovnzq %r8, %rsi + cmovnzq %rax, %r8 + movq %rsi, 32(%rsp) + movq %r8, 64(%rsp) + + movq %rbx, %rsi + cmovnzq %r9, %rsi + cmovnzq %rbx, %r9 + movq %rsi, 40(%rsp) + movq %r9, 72(%rsp) + + movq %rcx, %rsi + cmovnzq %r10, %rsi + cmovnzq %rcx, %r10 + movq %rsi, 48(%rsp) + movq %r10, 80(%rsp) + + movq %rdx, %rsi + cmovnzq %r11, %rsi + cmovnzq %rdx, %r11 + movq %rsi, 56(%rsp) + movq %r11, 88(%rsp) + + movq $-19, %rax + movq $-1, %rbx + movq $-1, %rcx + movq $0x7fffffffffffffff, %rdx + subq %r12, %rax + sbbq %r13, %rbx + sbbq %r14, %rcx + sbbq %r15, %rdx + + movq ix, %r8 + movq bias, %r9 + testq %r8, %r8 + cmovzq %r8, %r9 + testq %r9, %r9 + + cmovzq %r12, %rax + cmovzq %r13, %rbx + cmovzq %r14, %rcx + cmovzq %r15, %rdx + movq %rax, 96(%rsp) + movq %rbx, 104(%rsp) + movq %rcx, 112(%rsp) + movq %rdx, 120(%rsp) + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd(acc,acc,tabent) + + double_4(t0,z_1) + sub_4(t1,y_1,x_1) + add_4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_p25519(z_3,t4,t0) + mul_p25519(x_3,t5,t4) + mul_p25519(y_3,t0,t1) + mul_p25519(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + addq $4, i + cmpq $256, i + jc scalarloop + +// Now we need to translate from Edwards curve edwards25519 back +// to the Montgomery form curve25519. The mapping in the affine +// representations is +// +// (x,y) |-> ((1 + y) / (1 - y), c * (1 + y) / ((1 - y) * x)) +// +// For x25519, we only need the x coordinate, and we compute this as +// +// (1 + y) / (1 - y) = (x + x * y) / (x - x * y) +// = (X/Z + T/Z) / (X/Z - T/Z) +// = (X + T) / (X - T) +// = (X + T) * inverse(X - T) +// +// We could equally well use (Z + Y) / (Z - Y), but the above has the +// same cost, and it more explicitly forces zero output whenever X = 0, +// regardless of how the modular inverse behaves on zero inputs. In +// the present setting (base point 9, mangled scalar) that doesn't +// really matter anyway since X = 0 never arises, but it seems a +// little bit tidier. Note that both Edwards point (0,1) which maps to +// the Montgomery point at infinity, and Edwards (0,-1) which maps to +// Montgomery (0,0) [this is the 2-torsion point] are both by definition +// mapped to 0 by the X coordinate mapping used to define curve25519. +// +// First the addition and subtraction: + + add_4(y_3,x_3,w_3) + sub_4(z_3,x_3,w_3) + +// Prepare to call the modular inverse function to get x_3 = 1/z_3 + + movq $4, %rdi + leaq 128(%rsp), %rsi + leaq 192(%rsp), %rdx + leaq p_25519(%rip), %rcx + leaq 256(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites local variables that are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +innerloop: + movq %rbp, %rax + movq %rsi, %rdi + movq %rcx, %r13 + movq %rdx, %r15 + movq $0x1, %rbx + negq %rdi + andq %r14, %rbx + cmoveq %rbx, %rax + cmoveq %rbx, %rdi + cmoveq %rbx, %r13 + cmoveq %rbx, %r15 + movq %r12, %rbx + addq %r14, %rdi + movq %rdi, %r8 + negq %rdi + subq %rax, %rbx + cmovbq %r12, %rbp + cmovbq %r14, %rsi + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %r8, %rdi + movq %rbx, %r12 + notq %rbx + incq %rbx + cmovbq %rbx, %r12 + movq %rdi, %r14 + addq %r13, %r10 + addq %r15, %r11 + shrq $1, %r12 + shrq $1, %r14 + leaq (%rcx,%rcx), %rcx + leaq (%rdx,%rdx), %rdx + decq %r9 + jne innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je wmontend +wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne wmontloop +wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je zmontend +zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne zmontloop +zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb fliploop + subq $0x3a, 0x20(%rsp) + ja outerloop + +// The final result is (X + T) / (X - T) + + movq res, %rbp + mul_p25519(resx,y_3,x_3) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 2^254 * G and (2^254 + 8) * G in extended-projective coordinates +// but with z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x251037f7cf4e861d + .quad 0x10ede0fb19fb128f + .quad 0x96c033b175f5e2c8 + .quad 0x055f070d6c15fb0d + + .quad 0x7c52af2c97473e69 + .quad 0x022f82391bad8378 + .quad 0x9991e1b02adb476f + .quad 0x511144a03a99b855 + + .quad 0x5fafc3b88ff2e4ae + .quad 0x855e4ff0de1230ff + .quad 0x72e302a348492870 + .quad 0x1253c19e53dbe1bc + +edwards25519_8g: + + .quad 0x331d086e0d9abcaa + .quad 0x1e23c96d311a10c9 + .quad 0x96d0f95e58c13478 + .quad 0x2f72f7384fcfcc59 + + .quad 0x39a6cd1cfd7d87c9 + .quad 0x9867a0abd8ae153a + .quad 0xa49d2a5f35986745 + .quad 0x57012940cdfe82e1 + + .quad 0x5046a6532ec5544a + .quad 0x6d674004739ff6c9 + .quad 0x9bbaa44b234a70e3 + .quad 0x5e6d8901138cf386 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + + // 2^252 * 1 * G + + .quad 0xb1507ca1ab1c6eb9 + .quad 0xbd448f3e16b687b3 + .quad 0x3455fb7f2c7a91ab + .quad 0x7579229e2f2adec1 + .quad 0x6ab5dcb85b1c16b7 + .quad 0x94c0fce83c7b27a5 + .quad 0xa4b11c1a735517be + .quad 0x499238d0ba0eafaa + .quad 0xecf46e527aba8b57 + .quad 0x15a08c478bd1647b + .quad 0x7af1c6a65f706fef + .quad 0x6345fa78f03a30d5 + + // 2^252 * 2 * G + + .quad 0xdf02f95f1015e7a1 + .quad 0x790ec41da9b40263 + .quad 0x4d3a0ea133ea1107 + .quad 0x54f70be7e33af8c9 + .quad 0x93d3cbe9bdd8f0a4 + .quad 0xdb152c1bfd177302 + .quad 0x7dbddc6d7f17a875 + .quad 0x3e1a71cc8f426efe + .quad 0xc83ca3e390babd62 + .quad 0x80ede3670291c833 + .quad 0xc88038ccd37900c4 + .quad 0x2c5fc0231ec31fa1 + + // 2^252 * 3 * G + + .quad 0xfeba911717038b4f + .quad 0xe5123721c9deef81 + .quad 0x1c97e4e75d0d8834 + .quad 0x68afae7a23dc3bc6 + .quad 0xc422e4d102456e65 + .quad 0x87414ac1cad47b91 + .quad 0x1592e2bba2b6ffdd + .quad 0x75d9d2bff5c2100f + .quad 0x5bd9b4763626e81c + .quad 0x89966936bca02edd + .quad 0x0a41193d61f077b3 + .quad 0x3097a24200ce5471 + + // 2^252 * 4 * G + + .quad 0x57427734c7f8b84c + .quad 0xf141a13e01b270e9 + .quad 0x02d1adfeb4e564a6 + .quad 0x4bb23d92ce83bd48 + .quad 0xa162e7246695c486 + .quad 0x131d633435a89607 + .quad 0x30521561a0d12a37 + .quad 0x56704bada6afb363 + .quad 0xaf6c4aa752f912b9 + .quad 0x5e665f6cd86770c8 + .quad 0x4c35ac83a3c8cd58 + .quad 0x2b7a29c010a58a7e + + // 2^252 * 5 * G + + .quad 0xc4007f77d0c1cec3 + .quad 0x8d1020b6bac492f8 + .quad 0x32ec29d57e69daaf + .quad 0x599408759d95fce0 + .quad 0x33810a23bf00086e + .quad 0xafce925ee736ff7c + .quad 0x3d60e670e24922d4 + .quad 0x11ce9e714f96061b + .quad 0x219ef713d815bac1 + .quad 0xf141465d485be25c + .quad 0x6d5447cc4e513c51 + .quad 0x174926be5ef44393 + + // 2^252 * 6 * G + + .quad 0xb5deb2f9fc5bd5bb + .quad 0x92daa72ae1d810e1 + .quad 0xafc4cfdcb72a1c59 + .quad 0x497d78813fc22a24 + .quad 0x3ef5d41593ea022e + .quad 0x5cbcc1a20ed0eed6 + .quad 0x8fd24ecf07382c8c + .quad 0x6fa42ead06d8e1ad + .quad 0xe276824a1f73371f + .quad 0x7f7cf01c4f5b6736 + .quad 0x7e201fe304fa46e7 + .quad 0x785a36a357808c96 + + // 2^252 * 7 * G + + .quad 0x825fbdfd63014d2b + .quad 0xc852369c6ca7578b + .quad 0x5b2fcd285c0b5df0 + .quad 0x12ab214c58048c8f + .quad 0x070442985d517bc3 + .quad 0x6acd56c7ae653678 + .quad 0x00a27983985a7763 + .quad 0x5167effae512662b + .quad 0xbd4ea9e10f53c4b6 + .quad 0x1673dc5f8ac91a14 + .quad 0xa8f81a4e2acc1aba + .quad 0x33a92a7924332a25 + + // 2^252 * 8 * G + + .quad 0x9dd1f49927996c02 + .quad 0x0cb3b058e04d1752 + .quad 0x1f7e88967fd02c3e + .quad 0x2f964268cb8b3eb1 + .quad 0x7ba95ba0218f2ada + .quad 0xcff42287330fb9ca + .quad 0xdada496d56c6d907 + .quad 0x5380c296f4beee54 + .quad 0x9d4f270466898d0a + .quad 0x3d0987990aff3f7a + .quad 0xd09ef36267daba45 + .quad 0x7761455e7b1c669c + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/curve25519_x25519base_alt.S b/x86_att/curve25519/curve25519_x25519base_alt.S new file mode 100644 index 0000000000..67b98ff992 --- /dev/null +++ b/x86_att/curve25519/curve25519_x25519base_alt.S @@ -0,0 +1,9022 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 on base element 9 +// Input scalar[4]; output res[4] +// +// extern void curve25519_x25519base_alt +// (uint64_t res[static 4],uint64_t scalar[static 4]); +// +// Given a scalar n, returns the X coordinate of n * G where G = (9,...) is +// the standard generator. The scalar is first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). +// +// Standard x86-64 ABI: RDI = res, RSI = scalar +// Microsoft x64 ABI: RCX = res, RDX = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_alt) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// The result "resx" assumes the "res" pointer has been preloaded into %rbp. + +#define resx (0*NUMSIZE)(%rbp) + +#define scalar (0*NUMSIZE)(%rsp) + +#define tabent (1*NUMSIZE)(%rsp) +#define ymx_2 (1*NUMSIZE)(%rsp) +#define xpy_2 (2*NUMSIZE)(%rsp) +#define kxy_2 (3*NUMSIZE)(%rsp) + +#define acc (4*NUMSIZE)(%rsp) +#define x_1 (4*NUMSIZE)(%rsp) +#define y_1 (5*NUMSIZE)(%rsp) +#define z_1 (6*NUMSIZE)(%rsp) +#define w_1 (7*NUMSIZE)(%rsp) +#define x_3 (4*NUMSIZE)(%rsp) +#define y_3 (5*NUMSIZE)(%rsp) +#define z_3 (6*NUMSIZE)(%rsp) +#define w_3 (7*NUMSIZE)(%rsp) + +#define tmpspace (8*NUMSIZE)(%rsp) +#define t0 (8*NUMSIZE)(%rsp) +#define t1 (9*NUMSIZE)(%rsp) +#define t2 (10*NUMSIZE)(%rsp) +#define t3 (11*NUMSIZE)(%rsp) +#define t4 (12*NUMSIZE)(%rsp) +#define t5 (13*NUMSIZE)(%rsp) + +// Stable homes for the input result pointer, and other variables + +#define res 14*NUMSIZE(%rsp) + +#define i 14*NUMSIZE+8(%rsp) + +#define bias 14*NUMSIZE+16(%rsp) + +#define bf 14*NUMSIZE+24(%rsp) +#define ix 14*NUMSIZE+24(%rsp) + +#define tab 15*NUMSIZE(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (15*NUMSIZE+8) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519_alt. + +#define mul_p25519(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + leaq 0x1(%r12), %rax ; \ + movl $0x13, %esi ; \ + bts $63, %r11 ; \ + imulq %rsi, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rsi, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rcx, %r9 ; \ + sbbq %rcx, %r10 ; \ + sbbq %rcx, %r11 ; \ + btr $63, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %ebx ; \ + movq %r12, %rax ; \ + mulq %rbx; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Plain 4-digit add and doubling without any normalization +// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, +// indeed one < 2 * p_25519 for normalized inputs. + +#define add_4(P0,P1,P2) \ + movq P1, %rax ; \ + addq P2, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + adcq 8+P2, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + adcq 16+P2, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + adcq 24+P2, %rax ; \ + movq %rax, 24+P0 + +#define double_4(P0,P1) \ + movq P1, %rax ; \ + addq %rax, %rax ; \ + movq %rax, P0 ; \ + movq 8+P1, %rax ; \ + adcq %rax, %rax ; \ + movq %rax, 8+P0 ; \ + movq 16+P1, %rax ; \ + adcq %rax, %rax ; \ + movq %rax, 16+P0 ; \ + movq 24+P1, %rax ; \ + adcq %rax, %rax ; \ + movq %rax, 24+P0 + +// Subtraction of a pair of numbers < p_25519 just sufficient +// to give a 4-digit result. It actually always does (x - z) + (2^255-19) +// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 +// implicitly + +#define sub_4(P0,P1,P2) \ + movq P1, %r8 ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + subq $19, %r8 ; \ + movq %r8, P0 ; \ + sbbq $0, %r9 ; \ + movq %r9, 8+P0 ; \ + sbbq $0, %r10 ; \ + movq %r10, 16+P0 ; \ + sbbq $0, %rax ; \ + btc $63, %rax ; \ + movq %rax, 24+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 +// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. + +#define add_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +S2N_BN_SYMBOL(curve25519_x25519base_alt): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from keeping code and data together. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + callq curve25519_x25519base_alt_standard + popq %rsi + popq %rdi + ret + +curve25519_x25519base_alt_standard: +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy the input scalar to its local variable while mangling it. +// In principle the mangling is into 01xxx...xxx000, but actually +// we only clear the top two bits so 00xxx...xxxxxx. The additional +// 2^254 * G is taken care of by the starting value for the addition +// chain below, while we never look at the three low bits at all. + + movq (%rsi), %rax + movq %rax, (%rsp) + movq 8(%rsi), %rax + movq %rax, 8(%rsp) + movq 16(%rsi), %rax + movq %rax, 16(%rsp) + movq $0x3fffffffffffffff, %rax + andq 24(%rsi), %rax + movq %rax, 24(%rsp) + +// The main part of the computation is on the edwards25519 curve in +// extended-projective coordinates (X,Y,Z,T), representing a point +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// Only at the very end do we translate back to curve25519. So G +// below means the generator within edwards25519 corresponding to +// (9,...) for curve25519, via the standard isomorphism. +// +// Initialize accumulator "acc" to either (2^254 + 8) * G or just 2^254 * G +// depending on bit 3 of the scalar, the only nonzero bit of the bottom 4. +// Thus, we have effectively dealt with bits 0, 1, 2, 3, 254 and 255. + + movq (%rsp), %rax + andq $8, %rax + + leaq edwards25519_0g(%rip), %r10 + leaq edwards25519_8g(%rip), %r11 + + movq (%r10), %rax + movq (%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*16(%rsp) + + movq 8*1(%r10), %rax + movq 8*1(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*17(%rsp) + + movq 8*2(%r10), %rax + movq 8*2(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*18(%rsp) + + movq 8*3(%r10), %rax + movq 8*3(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*19(%rsp) + + movq 8*4(%r10), %rax + movq 8*4(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*20(%rsp) + + movq 8*5(%r10), %rax + movq 8*5(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*21(%rsp) + + movq 8*6(%r10), %rax + movq 8*6(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*22(%rsp) + + movq 8*7(%r10), %rax + movq 8*7(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*23(%rsp) + + movl $1, %eax + movq %rax, 8*24(%rsp) + movl $0, %eax + movq %rax, 8*25(%rsp) + movq %rax, 8*26(%rsp) + movq %rax, 8*27(%rsp) + + movq 8*8(%r10), %rax + movq 8*8(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*28(%rsp) + + movq 8*9(%r10), %rax + movq 8*9(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*29(%rsp) + + movq 8*10(%r10), %rax + movq 8*10(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*30(%rsp) + + movq 8*11(%r10), %rax + movq 8*11(%r11), %rcx + cmovnzq %rcx, %rax + movq %rax, 8*31(%rsp) + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 4 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * G at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * G = (16 * (h + 1) - (16 - l)) * G is used +// when l >= 9. Note that we can't have any bias left over at the +// end because of the clearing of bit 255 of the scalar, meaning the +// l >= 9 case cannot arise on the last iteration. + + movq $4, i + leaq edwards25519_gtable(%rip), %rax + movq %rax, tab + movq $0, bias + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq (%rsp,%rax,8), %rax // Exploiting scalar = sp exactly + shrq %cl, %rax + andq $15, %rax + addq bias, %rax + movq %rax, bf + + cmpq $9, bf + sbbq %rax, %rax + incq %rax + movq %rax, bias + + movq $16, %rdi + subq bf, %rdi + cmpq $0, bias + cmovzq bf, %rdi + movq %rdi, ix + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + movq tab, %rbp + + cmpq $1, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + + addq $96, %rbp + movq %rbp, tab + +// We now have the triple from the table in registers as follows +// +// [%rdx;%rcx;%rbx;%rax] = y - x +// [%r11;%r10;%r9;%r8] = x + y +// [%r15;%r14;%r13;%r12] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmpq $0, bias + + movq %rax, %rsi + cmovnzq %r8, %rsi + cmovnzq %rax, %r8 + movq %rsi, 32(%rsp) + movq %r8, 64(%rsp) + + movq %rbx, %rsi + cmovnzq %r9, %rsi + cmovnzq %rbx, %r9 + movq %rsi, 40(%rsp) + movq %r9, 72(%rsp) + + movq %rcx, %rsi + cmovnzq %r10, %rsi + cmovnzq %rcx, %r10 + movq %rsi, 48(%rsp) + movq %r10, 80(%rsp) + + movq %rdx, %rsi + cmovnzq %r11, %rsi + cmovnzq %rdx, %r11 + movq %rsi, 56(%rsp) + movq %r11, 88(%rsp) + + movq $-19, %rax + movq $-1, %rbx + movq $-1, %rcx + movq $0x7fffffffffffffff, %rdx + subq %r12, %rax + sbbq %r13, %rbx + sbbq %r14, %rcx + sbbq %r15, %rdx + + movq ix, %r8 + movq bias, %r9 + testq %r8, %r8 + cmovzq %r8, %r9 + testq %r9, %r9 + + cmovzq %r12, %rax + cmovzq %r13, %rbx + cmovzq %r14, %rcx + cmovzq %r15, %rdx + movq %rax, 96(%rsp) + movq %rbx, 104(%rsp) + movq %rcx, 112(%rsp) + movq %rdx, 120(%rsp) + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd_alt(acc,acc,tabent) + + double_4(t0,z_1) + sub_4(t1,y_1,x_1) + add_4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_p25519(z_3,t4,t0) + mul_p25519(x_3,t5,t4) + mul_p25519(y_3,t0,t1) + mul_p25519(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + addq $4, i + cmpq $256, i + jc scalarloop + +// Now we need to translate from Edwards curve edwards25519 back +// to the Montgomery form curve25519. The mapping in the affine +// representations is +// +// (x,y) |-> ((1 + y) / (1 - y), c * (1 + y) / ((1 - y) * x)) +// +// For x25519, we only need the x coordinate, and we compute this as +// +// (1 + y) / (1 - y) = (x + x * y) / (x - x * y) +// = (X/Z + T/Z) / (X/Z - T/Z) +// = (X + T) / (X - T) +// = (X + T) * inverse(X - T) +// +// We could equally well use (Z + Y) / (Z - Y), but the above has the +// same cost, and it more explicitly forces zero output whenever X = 0, +// regardless of how the modular inverse behaves on zero inputs. In +// the present setting (base point 9, mangled scalar) that doesn't +// really matter anyway since X = 0 never arises, but it seems a +// little bit tidier. Note that both Edwards point (0,1) which maps to +// the Montgomery point at infinity, and Edwards (0,-1) which maps to +// Montgomery (0,0) [this is the 2-torsion point] are both by definition +// mapped to 0 by the X coordinate mapping used to define curve25519. +// +// First the addition and subtraction: + + add_4(y_3,x_3,w_3) + sub_4(z_3,x_3,w_3) + +// Prepare to call the modular inverse function to get x_3 = 1/z_3 + + movq $4, %rdi + leaq 128(%rsp), %rsi + leaq 192(%rsp), %rdx + leaq p_25519(%rip), %rcx + leaq 256(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites local variables that are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +innerloop: + movq %rbp, %rax + movq %rsi, %rdi + movq %rcx, %r13 + movq %rdx, %r15 + movq $0x1, %rbx + negq %rdi + andq %r14, %rbx + cmoveq %rbx, %rax + cmoveq %rbx, %rdi + cmoveq %rbx, %r13 + cmoveq %rbx, %r15 + movq %r12, %rbx + addq %r14, %rdi + movq %rdi, %r8 + negq %rdi + subq %rax, %rbx + cmovbq %r12, %rbp + cmovbq %r14, %rsi + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %r8, %rdi + movq %rbx, %r12 + notq %rbx + incq %rbx + cmovbq %rbx, %r12 + movq %rdi, %r14 + addq %r13, %r10 + addq %r15, %r11 + shrq $1, %r12 + shrq $1, %r14 + leaq (%rcx,%rcx), %rcx + leaq (%rdx,%rdx), %rdx + decq %r9 + jne innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je wmontend +wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne wmontloop +wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je zmontend +zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne zmontloop +zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb fliploop + subq $0x3a, 0x20(%rsp) + ja outerloop + +// The final result is (X + T) / (X - T) + + movq res, %rbp + mul_p25519(resx,y_3,x_3) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 2^254 * G and (2^254 + 8) * G in extended-projective coordinates +// but with z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x251037f7cf4e861d + .quad 0x10ede0fb19fb128f + .quad 0x96c033b175f5e2c8 + .quad 0x055f070d6c15fb0d + + .quad 0x7c52af2c97473e69 + .quad 0x022f82391bad8378 + .quad 0x9991e1b02adb476f + .quad 0x511144a03a99b855 + + .quad 0x5fafc3b88ff2e4ae + .quad 0x855e4ff0de1230ff + .quad 0x72e302a348492870 + .quad 0x1253c19e53dbe1bc + +edwards25519_8g: + + .quad 0x331d086e0d9abcaa + .quad 0x1e23c96d311a10c9 + .quad 0x96d0f95e58c13478 + .quad 0x2f72f7384fcfcc59 + + .quad 0x39a6cd1cfd7d87c9 + .quad 0x9867a0abd8ae153a + .quad 0xa49d2a5f35986745 + .quad 0x57012940cdfe82e1 + + .quad 0x5046a6532ec5544a + .quad 0x6d674004739ff6c9 + .quad 0x9bbaa44b234a70e3 + .quad 0x5e6d8901138cf386 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + + // 2^252 * 1 * G + + .quad 0xb1507ca1ab1c6eb9 + .quad 0xbd448f3e16b687b3 + .quad 0x3455fb7f2c7a91ab + .quad 0x7579229e2f2adec1 + .quad 0x6ab5dcb85b1c16b7 + .quad 0x94c0fce83c7b27a5 + .quad 0xa4b11c1a735517be + .quad 0x499238d0ba0eafaa + .quad 0xecf46e527aba8b57 + .quad 0x15a08c478bd1647b + .quad 0x7af1c6a65f706fef + .quad 0x6345fa78f03a30d5 + + // 2^252 * 2 * G + + .quad 0xdf02f95f1015e7a1 + .quad 0x790ec41da9b40263 + .quad 0x4d3a0ea133ea1107 + .quad 0x54f70be7e33af8c9 + .quad 0x93d3cbe9bdd8f0a4 + .quad 0xdb152c1bfd177302 + .quad 0x7dbddc6d7f17a875 + .quad 0x3e1a71cc8f426efe + .quad 0xc83ca3e390babd62 + .quad 0x80ede3670291c833 + .quad 0xc88038ccd37900c4 + .quad 0x2c5fc0231ec31fa1 + + // 2^252 * 3 * G + + .quad 0xfeba911717038b4f + .quad 0xe5123721c9deef81 + .quad 0x1c97e4e75d0d8834 + .quad 0x68afae7a23dc3bc6 + .quad 0xc422e4d102456e65 + .quad 0x87414ac1cad47b91 + .quad 0x1592e2bba2b6ffdd + .quad 0x75d9d2bff5c2100f + .quad 0x5bd9b4763626e81c + .quad 0x89966936bca02edd + .quad 0x0a41193d61f077b3 + .quad 0x3097a24200ce5471 + + // 2^252 * 4 * G + + .quad 0x57427734c7f8b84c + .quad 0xf141a13e01b270e9 + .quad 0x02d1adfeb4e564a6 + .quad 0x4bb23d92ce83bd48 + .quad 0xa162e7246695c486 + .quad 0x131d633435a89607 + .quad 0x30521561a0d12a37 + .quad 0x56704bada6afb363 + .quad 0xaf6c4aa752f912b9 + .quad 0x5e665f6cd86770c8 + .quad 0x4c35ac83a3c8cd58 + .quad 0x2b7a29c010a58a7e + + // 2^252 * 5 * G + + .quad 0xc4007f77d0c1cec3 + .quad 0x8d1020b6bac492f8 + .quad 0x32ec29d57e69daaf + .quad 0x599408759d95fce0 + .quad 0x33810a23bf00086e + .quad 0xafce925ee736ff7c + .quad 0x3d60e670e24922d4 + .quad 0x11ce9e714f96061b + .quad 0x219ef713d815bac1 + .quad 0xf141465d485be25c + .quad 0x6d5447cc4e513c51 + .quad 0x174926be5ef44393 + + // 2^252 * 6 * G + + .quad 0xb5deb2f9fc5bd5bb + .quad 0x92daa72ae1d810e1 + .quad 0xafc4cfdcb72a1c59 + .quad 0x497d78813fc22a24 + .quad 0x3ef5d41593ea022e + .quad 0x5cbcc1a20ed0eed6 + .quad 0x8fd24ecf07382c8c + .quad 0x6fa42ead06d8e1ad + .quad 0xe276824a1f73371f + .quad 0x7f7cf01c4f5b6736 + .quad 0x7e201fe304fa46e7 + .quad 0x785a36a357808c96 + + // 2^252 * 7 * G + + .quad 0x825fbdfd63014d2b + .quad 0xc852369c6ca7578b + .quad 0x5b2fcd285c0b5df0 + .quad 0x12ab214c58048c8f + .quad 0x070442985d517bc3 + .quad 0x6acd56c7ae653678 + .quad 0x00a27983985a7763 + .quad 0x5167effae512662b + .quad 0xbd4ea9e10f53c4b6 + .quad 0x1673dc5f8ac91a14 + .quad 0xa8f81a4e2acc1aba + .quad 0x33a92a7924332a25 + + // 2^252 * 8 * G + + .quad 0x9dd1f49927996c02 + .quad 0x0cb3b058e04d1752 + .quad 0x1f7e88967fd02c3e + .quad 0x2f964268cb8b3eb1 + .quad 0x7ba95ba0218f2ada + .quad 0xcff42287330fb9ca + .quad 0xdada496d56c6d907 + .quad 0x5380c296f4beee54 + .quad 0x9d4f270466898d0a + .quad 0x3d0987990aff3f7a + .quad 0xd09ef36267daba45 + .quad 0x7761455e7b1c669c + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From feff47eb483ab325aad14a08af5f851633cdc1fc Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 9 Jan 2023 17:51:29 -0800 Subject: [PATCH 21/42] Add SM2 mapping to Montgomery representation s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/0579f2c5d9c0fe75951d3c0f4e45d3c0b8709bbb --- arm/p384/bignum_tomont_p384.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arm/p384/bignum_tomont_p384.S b/arm/p384/bignum_tomont_p384.S index c5bea5e0d5..efed55f8c0 100644 --- a/arm/p384/bignum_tomont_p384.S +++ b/arm/p384/bignum_tomont_p384.S @@ -91,7 +91,7 @@ S2N_BN_SYMBOL(bignum_tomont_p384_alt): ldp d4, d5, [x1, #32] // Do an initial reduction to make sure this is < p_384, using just -// a copy of the bignum_mod_p384 code. This is needed to set up the +// a copy of the bignum_mod_p384_6 code. This is needed to set up the // invariant "input < p_384" for the main modular reduction steps. mov n0, #0x00000000ffffffff From fb5363a591d1fc4a76f11e4fae461eb9dad8ec24 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 10 Jan 2023 16:31:41 -0800 Subject: [PATCH 22/42] Add SM2 field negation And tweak the ARM implementations of analogous functions for P-256 and P-384 to avoid a couple of instructions by using immediates directly instead of loading constants. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/ed5fdd3c8822cd593248d38cea038f71c89fd5b6 --- arm/p384/bignum_neg_p384.S | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arm/p384/bignum_neg_p384.S b/arm/p384/bignum_neg_p384.S index 48efcb51bd..24bdbb1b23 100644 --- a/arm/p384/bignum_neg_p384.S +++ b/arm/p384/bignum_neg_p384.S @@ -29,7 +29,6 @@ #define d4 x8 #define d5 x9 - S2N_BN_SYMBOL(bignum_neg_p384): // Load the 6 digits of x @@ -49,21 +48,15 @@ S2N_BN_SYMBOL(bignum_neg_p384): cmp p, #0 csetm p, ne -// Load and mask the complicated lower three words of -// p_384 = [-1;-1;-1;n2;n1;n0] and subtract, using mask itself for upper digits +// Mask the complicated lower three words of p_384 = [-1;-1;-1;n2;n1;n0] +// and subtract, using mask itself for upper digits - mov t, #0x00000000ffffffff - and t, t, p + and t, p, #0x00000000ffffffff subs d0, t, d0 - - mov t, #0xffffffff00000000 - and t, t, p + and t, p, #0xffffffff00000000 sbcs d1, t, d1 - - mov t, #0xfffffffffffffffe - and t, t, p + and t, p, #0xfffffffffffffffe sbcs d2, t, d2 - sbcs d3, p, d3 sbcs d4, p, d4 sbc d5, p, d5 From d852f9c341c324c3b46a46363c0cd2e20e9343d1 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 11 Jan 2023 15:51:16 -0800 Subject: [PATCH 23/42] Add SM2 field doubling and halving And again, make minor tweaks to the ARM implementations of some analogous functions for P-256 and P-384. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/0ac9eea30a37c3fef7505647105428cc3bff1185 --- arm/p384/bignum_half_p384.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arm/p384/bignum_half_p384.S b/arm/p384/bignum_half_p384.S index e242829c6b..e3a7ff0e77 100644 --- a/arm/p384/bignum_half_p384.S +++ b/arm/p384/bignum_half_p384.S @@ -47,14 +47,11 @@ S2N_BN_SYMBOL(bignum_half_p384): // Do a masked addition of p_384, catching carry in a 7th word - mov n, #0x00000000ffffffff - and n, n, m + and n, m, #0x00000000ffffffff adds d0, d0, n - mov n, #0xffffffff00000000 - and n, n, m + and n, m, #0xffffffff00000000 adcs d1, d1, n - mov n, #0xfffffffffffffffe - and n, n, m + and n, m, #0xfffffffffffffffe adcs d2, d2, n adcs d3, d3, m adcs d4, d4, m From dc9e7a8e615f8f80e33e6fe10f6fc3bf3deb9ac3 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 9 Feb 2023 19:07:49 -0800 Subject: [PATCH 24/42] Slightly optimize x86 modular inverse inner loop The new one is somewhat shorter (29 instructions versus 33) and seems to be appreciably faster on several microarchitectures. Also made analogous changes to the coprimality test and the embedded instances of modular inverse within scalar multiplications. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/d1ef8cfa08845e4150f5b0a707a3b49f03f13055 --- x86_att/curve25519/curve25519_x25519.S | 50 +++++++++---------- x86_att/curve25519/curve25519_x25519_alt.S | 50 +++++++++---------- x86_att/curve25519/curve25519_x25519base.S | 50 +++++++++---------- .../curve25519/curve25519_x25519base_alt.S | 50 +++++++++---------- 4 files changed, 92 insertions(+), 108 deletions(-) diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 2887c510fb..689429c4b8 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -888,38 +888,34 @@ toploop: movq %r8, (%rsp) movq %r15, 0x18(%rsp) innerloop: - movq %rbp, %rax - movq %rsi, %rdi - movq %rcx, %r13 - movq %rdx, %r15 - movq $0x1, %rbx - negq %rdi - andq %r14, %rbx - cmoveq %rbx, %rax - cmoveq %rbx, %rdi - cmoveq %rbx, %r13 - cmoveq %rbx, %r15 - movq %r12, %rbx - addq %r14, %rdi - movq %rdi, %r8 - negq %rdi - subq %rax, %rbx + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi cmovbq %r12, %rbp - cmovbq %r14, %rsi + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 cmovbq %r10, %rcx cmovbq %r11, %rdx - cmovaeq %r8, %rdi - movq %rbx, %r12 - notq %rbx - incq %rbx - cmovbq %rbx, %r12 - movq %rdi, %r14 - addq %r13, %r10 + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 addq %r15, %r11 shrq $1, %r12 - shrq $1, %r14 - leaq (%rcx,%rcx), %rcx - leaq (%rdx,%rdx), %rdx + addq %rcx, %rcx + addq %rdx, %rdx decq %r9 jne innerloop movq 0x8(%rsp), %rdi diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index fa34c2e88a..1d132ab051 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -1054,38 +1054,34 @@ toploop: movq %r8, (%rsp) movq %r15, 0x18(%rsp) innerloop: - movq %rbp, %rax - movq %rsi, %rdi - movq %rcx, %r13 - movq %rdx, %r15 - movq $0x1, %rbx - negq %rdi - andq %r14, %rbx - cmoveq %rbx, %rax - cmoveq %rbx, %rdi - cmoveq %rbx, %r13 - cmoveq %rbx, %r15 - movq %r12, %rbx - addq %r14, %rdi - movq %rdi, %r8 - negq %rdi - subq %rax, %rbx + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi cmovbq %r12, %rbp - cmovbq %r14, %rsi + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 cmovbq %r10, %rcx cmovbq %r11, %rdx - cmovaeq %r8, %rdi - movq %rbx, %r12 - notq %rbx - incq %rbx - cmovbq %rbx, %r12 - movq %rdi, %r14 - addq %r13, %r10 + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 addq %r15, %r11 shrq $1, %r12 - shrq $1, %r14 - leaq (%rcx,%rcx), %rcx - leaq (%rdx,%rdx), %rdx + addq %rcx, %rcx + addq %rdx, %rdx decq %r9 jne innerloop movq 0x8(%rsp), %rdi diff --git a/x86_att/curve25519/curve25519_x25519base.S b/x86_att/curve25519/curve25519_x25519base.S index 177ad685de..673287830d 100644 --- a/x86_att/curve25519/curve25519_x25519base.S +++ b/x86_att/curve25519/curve25519_x25519base.S @@ -1011,38 +1011,34 @@ toploop: movq %r8, (%rsp) movq %r15, 0x18(%rsp) innerloop: - movq %rbp, %rax - movq %rsi, %rdi - movq %rcx, %r13 - movq %rdx, %r15 - movq $0x1, %rbx - negq %rdi - andq %r14, %rbx - cmoveq %rbx, %rax - cmoveq %rbx, %rdi - cmoveq %rbx, %r13 - cmoveq %rbx, %r15 - movq %r12, %rbx - addq %r14, %rdi - movq %rdi, %r8 - negq %rdi - subq %rax, %rbx + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi cmovbq %r12, %rbp - cmovbq %r14, %rsi + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 cmovbq %r10, %rcx cmovbq %r11, %rdx - cmovaeq %r8, %rdi - movq %rbx, %r12 - notq %rbx - incq %rbx - cmovbq %rbx, %r12 - movq %rdi, %r14 - addq %r13, %r10 + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 addq %r15, %r11 shrq $1, %r12 - shrq $1, %r14 - leaq (%rcx,%rcx), %rcx - leaq (%rdx,%rdx), %rdx + addq %rcx, %rcx + addq %rdx, %rdx decq %r9 jne innerloop movq 0x8(%rsp), %rdi diff --git a/x86_att/curve25519/curve25519_x25519base_alt.S b/x86_att/curve25519/curve25519_x25519base_alt.S index 67b98ff992..4e0285088f 100644 --- a/x86_att/curve25519/curve25519_x25519base_alt.S +++ b/x86_att/curve25519/curve25519_x25519base_alt.S @@ -1087,38 +1087,34 @@ toploop: movq %r8, (%rsp) movq %r15, 0x18(%rsp) innerloop: - movq %rbp, %rax - movq %rsi, %rdi - movq %rcx, %r13 - movq %rdx, %r15 - movq $0x1, %rbx - negq %rdi - andq %r14, %rbx - cmoveq %rbx, %rax - cmoveq %rbx, %rdi - cmoveq %rbx, %r13 - cmoveq %rbx, %r15 - movq %r12, %rbx - addq %r14, %rdi - movq %rdi, %r8 - negq %rdi - subq %rax, %rbx + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi cmovbq %r12, %rbp - cmovbq %r14, %rsi + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 cmovbq %r10, %rcx cmovbq %r11, %rdx - cmovaeq %r8, %rdi - movq %rbx, %r12 - notq %rbx - incq %rbx - cmovbq %rbx, %r12 - movq %rdi, %r14 - addq %r13, %r10 + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 addq %r15, %r11 shrq $1, %r12 - shrq $1, %r14 - leaq (%rcx,%rcx), %rcx - leaq (%rdx,%rdx), %rdx + addq %rcx, %rcx + addq %rdx, %rdx decq %r9 jne innerloop movq 0x8(%rsp), %rdi From 8124ddf2fcadc44dcb3b2d63b17b9fbb1787b764 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 1 Mar 2023 08:47:45 -0800 Subject: [PATCH 25/42] Loosen modular reductions in X25519 basepoint functions The main loop now just maintains coordinates modulo 2^256-38, only fully reducing modulo 2^255-19 right at the end, which improves performance slightly. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/c8c00c8d71a999938be78cc3e8b99980b983f54a --- arm/curve25519/curve25519_x25519base.S | 102 ++++++---------- arm/curve25519/curve25519_x25519base_alt.S | 102 ++++++---------- x86_att/curve25519/curve25519_x25519base.S | 114 +++++++----------- .../curve25519/curve25519_x25519base_alt.S | 112 +++++++---------- 4 files changed, 168 insertions(+), 262 deletions(-) diff --git a/arm/curve25519/curve25519_x25519base.S b/arm/curve25519/curve25519_x25519base.S index 314169cf3f..7076630698 100644 --- a/arm/curve25519/curve25519_x25519base.S +++ b/arm/curve25519/curve25519_x25519base.S @@ -400,55 +400,6 @@ stp x7, x8, [P0]; \ stp x9, x10, [P0+16] -// Plain 4-digit add and doubling without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, -// indeed one < 2 * p_25519 for normalized inputs. - -#define add_4(P0,P1,P2) \ - ldp x0, x1, [P1]; \ - ldp x4, x5, [P2]; \ - adds x0, x0, x4; \ - adcs x1, x1, x5; \ - ldp x2, x3, [P1+16]; \ - ldp x6, x7, [P2+16]; \ - adcs x2, x2, x6; \ - adc x3, x3, x7; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16] - -#define double_4(P0,P1) \ - ldp x0, x1, [P1]; \ - adds x0, x0, x0; \ - adcs x1, x1, x1; \ - ldp x2, x3, [P1+16]; \ - adcs x2, x2, x2; \ - adc x3, x3, x3; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16] - -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly - -#define sub_4(P0,P1,P2) \ - ldp x5, x6, [P1]; \ - ldp x4, x3, [P2]; \ - subs x5, x5, x4; \ - sbcs x6, x6, x3; \ - ldp x7, x8, [P1+16]; \ - ldp x4, x3, [P2+16]; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - mov x3, #19; \ - subs x5, x5, x3; \ - sbcs x6, x6, xzr; \ - sbcs x7, x7, xzr; \ - mov x4, #0x8000000000000000; \ - sbc x8, x8, x4; \ - stp x5, x6, [P0]; \ - stp x7, x8, [P0+16] - // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 #define sub_twice4(P0,P1,P2) \ @@ -469,8 +420,11 @@ stp x5, x6, [P0]; \ stp x7, x8, [P0+16] -// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 -// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. #define add_twice4(P0,P1,P2) \ ldp x3, x4, [P1]; \ @@ -490,6 +444,22 @@ stp x3, x4, [P0]; \ stp x5, x6, [P0+16] +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + S2N_BN_SYMBOL(curve25519_x25519base): // Save regs and make room for temporaries @@ -850,11 +820,14 @@ scalarloop: // Extended-projective and precomputed mixed addition. // This is effectively the same as calling the standalone -// function edwards25519_pepadd(acc,acc,tabent) - - double_4(t0,z_1) - sub_4(t1,y_1,x_1) - add_4(t2,y_1,x_1) +// function edwards25519_pepadd(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) mul_4(t3,w_1,kxy_2) mul_4(t1,t1,ymx_2) mul_4(t2,t2,xpy_2) @@ -862,10 +835,10 @@ scalarloop: add_twice4(t0,t0,t3) sub_twice4(t5,t2,t1) add_twice4(t1,t2,t1) - mul_p25519(z_3,t4,t0) - mul_p25519(x_3,t5,t4) - mul_p25519(y_3,t0,t1) - mul_p25519(w_3,t5,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) // End of the main loop; move on by 4 bits. @@ -898,10 +871,12 @@ scalarloop: // // First the addition and subtraction: - add_4(y_3,x_3,w_3) - sub_4(z_3,x_3,w_3) + add_twice4(y_3,x_3,w_3) + sub_twice4(z_3,x_3,w_3) // Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Note that this works for the weakly normalized z_3 equally well. +// The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. mov x0, 4 add x1, x_3 @@ -1245,6 +1220,9 @@ zfliploop: b.hi outerloop // The final result is (X + T) / (X - T) +// This is the only operation in the whole computation that +// fully reduces modulo p_25519 since now we want the canonical +// answer as output. mul_p25519(resx,y_3,x_3) diff --git a/arm/curve25519/curve25519_x25519base_alt.S b/arm/curve25519/curve25519_x25519base_alt.S index 1ab9551f53..0631ac0279 100644 --- a/arm/curve25519/curve25519_x25519base_alt.S +++ b/arm/curve25519/curve25519_x25519base_alt.S @@ -282,55 +282,6 @@ stp x12, x13, [P0]; \ stp x14, x15, [P0+16] -// Plain 4-digit add and doubling without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, -// indeed one < 2 * p_25519 for normalized inputs. - -#define add_4(P0,P1,P2) \ - ldp x0, x1, [P1]; \ - ldp x4, x5, [P2]; \ - adds x0, x0, x4; \ - adcs x1, x1, x5; \ - ldp x2, x3, [P1+16]; \ - ldp x6, x7, [P2+16]; \ - adcs x2, x2, x6; \ - adc x3, x3, x7; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16] - -#define double_4(P0,P1) \ - ldp x0, x1, [P1]; \ - adds x0, x0, x0; \ - adcs x1, x1, x1; \ - ldp x2, x3, [P1+16]; \ - adcs x2, x2, x2; \ - adc x3, x3, x3; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16] - -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly - -#define sub_4(P0,P1,P2) \ - ldp x5, x6, [P1]; \ - ldp x4, x3, [P2]; \ - subs x5, x5, x4; \ - sbcs x6, x6, x3; \ - ldp x7, x8, [P1+16]; \ - ldp x4, x3, [P2+16]; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - mov x3, #19; \ - subs x5, x5, x3; \ - sbcs x6, x6, xzr; \ - sbcs x7, x7, xzr; \ - mov x4, #0x8000000000000000; \ - sbc x8, x8, x4; \ - stp x5, x6, [P0]; \ - stp x7, x8, [P0+16] - // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 #define sub_twice4(P0,P1,P2) \ @@ -351,8 +302,11 @@ stp x5, x6, [P0]; \ stp x7, x8, [P0+16] -// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 -// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. #define add_twice4(P0,P1,P2) \ ldp x3, x4, [P1]; \ @@ -372,6 +326,22 @@ stp x3, x4, [P0]; \ stp x5, x6, [P0+16] +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + S2N_BN_SYMBOL(curve25519_x25519base_alt): // Save regs and make room for temporaries @@ -732,11 +702,14 @@ scalarloop: // Extended-projective and precomputed mixed addition. // This is effectively the same as calling the standalone -// function edwards25519_pepadd_alt(acc,acc,tabent) - - double_4(t0,z_1) - sub_4(t1,y_1,x_1) - add_4(t2,y_1,x_1) +// function edwards25519_pepadd_alt(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) mul_4(t3,w_1,kxy_2) mul_4(t1,t1,ymx_2) mul_4(t2,t2,xpy_2) @@ -744,10 +717,10 @@ scalarloop: add_twice4(t0,t0,t3) sub_twice4(t5,t2,t1) add_twice4(t1,t2,t1) - mul_p25519(z_3,t4,t0) - mul_p25519(x_3,t5,t4) - mul_p25519(y_3,t0,t1) - mul_p25519(w_3,t5,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) // End of the main loop; move on by 4 bits. @@ -780,10 +753,12 @@ scalarloop: // // First the addition and subtraction: - add_4(y_3,x_3,w_3) - sub_4(z_3,x_3,w_3) + add_twice4(y_3,x_3,w_3) + sub_twice4(z_3,x_3,w_3) // Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Note that this works for the weakly normalized z_3 equally well. +// The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. mov x0, 4 add x1, x_3 @@ -1127,6 +1102,9 @@ zfliploop: b.hi outerloop // The final result is (X + T) / (X - T) +// This is the only operation in the whole computation that +// fully reduces modulo p_25519 since now we want the canonical +// answer as output. mul_p25519(resx,y_3,x_3) diff --git a/x86_att/curve25519/curve25519_x25519base.S b/x86_att/curve25519/curve25519_x25519base.S index 673287830d..1f9ee2377c 100644 --- a/x86_att/curve25519/curve25519_x25519base.S +++ b/x86_att/curve25519/curve25519_x25519base.S @@ -256,62 +256,6 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -// Plain 4-digit add and doubling without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, -// indeed one < 2 * p_25519 for normalized inputs. - -#define add_4(P0,P1,P2) \ - movq P1, %rax ; \ - addq P2, %rax ; \ - movq %rax, P0 ; \ - movq 8+P1, %rax ; \ - adcq 8+P2, %rax ; \ - movq %rax, 8+P0 ; \ - movq 16+P1, %rax ; \ - adcq 16+P2, %rax ; \ - movq %rax, 16+P0 ; \ - movq 24+P1, %rax ; \ - adcq 24+P2, %rax ; \ - movq %rax, 24+P0 - -#define double_4(P0,P1) \ - movq P1, %rax ; \ - addq %rax, %rax ; \ - movq %rax, P0 ; \ - movq 8+P1, %rax ; \ - adcq %rax, %rax ; \ - movq %rax, 8+P0 ; \ - movq 16+P1, %rax ; \ - adcq %rax, %rax ; \ - movq %rax, 16+P0 ; \ - movq 24+P1, %rax ; \ - adcq %rax, %rax ; \ - movq %rax, 24+P0 - -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly - -#define sub_4(P0,P1,P2) \ - movq P1, %r8 ; \ - subq P2, %r8 ; \ - movq 8+P1, %r9 ; \ - sbbq 8+P2, %r9 ; \ - movq 16+P1, %r10 ; \ - sbbq 16+P2, %r10 ; \ - movq 24+P1, %rax ; \ - sbbq 24+P2, %rax ; \ - subq $19, %r8 ; \ - movq %r8, P0 ; \ - sbbq $0, %r9 ; \ - movq %r9, 8+P0 ; \ - sbbq $0, %r10 ; \ - movq %r10, 16+P0 ; \ - sbbq $0, %rax ; \ - btc $63, %rax ; \ - movq %rax, 24+P0 - // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 #define sub_twice4(P0,P1,P2) \ @@ -335,8 +279,11 @@ movq %r10, 16+P0 ; \ movq %rax, 24+P0 -// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 -// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. #define add_twice4(P0,P1,P2) \ movq P1, %r8 ; \ @@ -359,6 +306,27 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 +#define double_twice4(P0,P1) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq %r8, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq %r9, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq %r10, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq %r11, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + S2N_BN_SYMBOL(curve25519_x25519base): // In this case the Windows form literally makes a subroutine call. @@ -845,11 +813,14 @@ scalarloop: // Extended-projective and precomputed mixed addition. // This is effectively the same as calling the standalone -// function edwards25519_pepadd(acc,acc,tabent) - - double_4(t0,z_1) - sub_4(t1,y_1,x_1) - add_4(t2,y_1,x_1) +// function edwards25519_pepadd(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) mul_4(t3,w_1,kxy_2) mul_4(t1,t1,ymx_2) mul_4(t2,t2,xpy_2) @@ -857,10 +828,10 @@ scalarloop: add_twice4(t0,t0,t3) sub_twice4(t5,t2,t1) add_twice4(t1,t2,t1) - mul_p25519(z_3,t4,t0) - mul_p25519(x_3,t5,t4) - mul_p25519(y_3,t0,t1) - mul_p25519(w_3,t5,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) // End of the main loop; move on by 4 bits. @@ -893,10 +864,12 @@ scalarloop: // // First the addition and subtraction: - add_4(y_3,x_3,w_3) - sub_4(z_3,x_3,w_3) + add_twice4(y_3,x_3,w_3) + sub_twice4(z_3,x_3,w_3) // Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Note that this works for the weakly normalized z_3 equally well. +// The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. movq $4, %rdi leaq 128(%rsp), %rsi @@ -1303,6 +1276,9 @@ fliploop: ja outerloop // The final result is (X + T) / (X - T) +// This is the only operation in the whole computation that +// fully reduces modulo p_25519 since now we want the canonical +// answer as output. movq res, %rbp mul_p25519(resx,y_3,x_3) diff --git a/x86_att/curve25519/curve25519_x25519base_alt.S b/x86_att/curve25519/curve25519_x25519base_alt.S index 4e0285088f..0027f47f90 100644 --- a/x86_att/curve25519/curve25519_x25519base_alt.S +++ b/x86_att/curve25519/curve25519_x25519base_alt.S @@ -332,62 +332,6 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -// Plain 4-digit add and doubling without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result, -// indeed one < 2 * p_25519 for normalized inputs. - -#define add_4(P0,P1,P2) \ - movq P1, %rax ; \ - addq P2, %rax ; \ - movq %rax, P0 ; \ - movq 8+P1, %rax ; \ - adcq 8+P2, %rax ; \ - movq %rax, 8+P0 ; \ - movq 16+P1, %rax ; \ - adcq 16+P2, %rax ; \ - movq %rax, 16+P0 ; \ - movq 24+P1, %rax ; \ - adcq 24+P2, %rax ; \ - movq %rax, 24+P0 - -#define double_4(P0,P1) \ - movq P1, %rax ; \ - addq %rax, %rax ; \ - movq %rax, P0 ; \ - movq 8+P1, %rax ; \ - adcq %rax, %rax ; \ - movq %rax, 8+P0 ; \ - movq 16+P1, %rax ; \ - adcq %rax, %rax ; \ - movq %rax, 16+P0 ; \ - movq 24+P1, %rax ; \ - adcq %rax, %rax ; \ - movq %rax, 24+P0 - -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly - -#define sub_4(P0,P1,P2) \ - movq P1, %r8 ; \ - subq P2, %r8 ; \ - movq 8+P1, %r9 ; \ - sbbq 8+P2, %r9 ; \ - movq 16+P1, %r10 ; \ - sbbq 16+P2, %r10 ; \ - movq 24+P1, %rax ; \ - sbbq 24+P2, %rax ; \ - subq $19, %r8 ; \ - movq %r8, P0 ; \ - sbbq $0, %r9 ; \ - movq %r9, 8+P0 ; \ - sbbq $0, %r10 ; \ - movq %r10, 16+P0 ; \ - sbbq $0, %rax ; \ - btc $63, %rax ; \ - movq %rax, 24+P0 - // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 #define sub_twice4(P0,P1,P2) \ @@ -411,8 +355,11 @@ movq %r10, 16+P0 ; \ movq %rax, 24+P0 -// Modular addition with inputs double modulus 2 * p_25519 = 2^256 - 38 -// and in general only guaranteeing a 4-digit result, not even < 2 * p_25519. +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. #define add_twice4(P0,P1,P2) \ movq P1, %r8 ; \ @@ -435,6 +382,27 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 +#define double_twice4(P0,P1) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq %r8, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq %r9, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq %r10, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq %r11, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + S2N_BN_SYMBOL(curve25519_x25519base_alt): // In this case the Windows form literally makes a subroutine call. @@ -921,11 +889,14 @@ scalarloop: // Extended-projective and precomputed mixed addition. // This is effectively the same as calling the standalone -// function edwards25519_pepadd_alt(acc,acc,tabent) - - double_4(t0,z_1) - sub_4(t1,y_1,x_1) - add_4(t2,y_1,x_1) +// function edwards25519_pepadd_alt(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) mul_4(t3,w_1,kxy_2) mul_4(t1,t1,ymx_2) mul_4(t2,t2,xpy_2) @@ -933,10 +904,10 @@ scalarloop: add_twice4(t0,t0,t3) sub_twice4(t5,t2,t1) add_twice4(t1,t2,t1) - mul_p25519(z_3,t4,t0) - mul_p25519(x_3,t5,t4) - mul_p25519(y_3,t0,t1) - mul_p25519(w_3,t5,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) // End of the main loop; move on by 4 bits. @@ -969,8 +940,8 @@ scalarloop: // // First the addition and subtraction: - add_4(y_3,x_3,w_3) - sub_4(z_3,x_3,w_3) + add_twice4(y_3,x_3,w_3) + sub_twice4(z_3,x_3,w_3) // Prepare to call the modular inverse function to get x_3 = 1/z_3 @@ -1379,6 +1350,9 @@ fliploop: ja outerloop // The final result is (X + T) / (X - T) +// This is the only operation in the whole computation that +// fully reduces modulo p_25519 since now we want the canonical +// answer as output. movq res, %rbp mul_p25519(resx,y_3,x_3) From 05108efacbb463efd7c67ec652c3fc09f2845967 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 6 Mar 2023 19:40:53 -0800 Subject: [PATCH 26/42] Loosen modular reductions in X25519 fresh-point functions The main loop now just maintains coordinates modulo 2^256-38, only fully reducing modulo 2^255-19 right at the end, and also special-cases four of the iterations to pure doublings because of the initial mangling of the scalar. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/be3a935063b45cf0450bf00bc4bf18a78c3dc728 --- arm/curve25519/curve25519_x25519.S | 487 ++++++++++++--------- arm/curve25519/curve25519_x25519_alt.S | 398 +++++++++-------- x86_att/curve25519/curve25519_x25519.S | 351 ++++++++------- x86_att/curve25519/curve25519_x25519_alt.S | 327 +++++++------- 4 files changed, 852 insertions(+), 711 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index 02b5b51289..27e41644db 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -75,10 +75,8 @@ #define NSPACE (12*NUMSIZE) -// Macros wrapping up the basic field operation calls -// bignum_mul_p25519 and bignum_sqr_p25519. -// These two are only trivially different from pure -// function calls to those subroutines. +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. #define mul_p25519(p0,p1,p2) \ ldp x3, x4, [p1]; \ @@ -243,121 +241,165 @@ stp x7, x8, [p0]; \ stp x9, x10, [p0+16] -#define sqr_p25519(p0,p1) \ - ldp x6, x7, [p1]; \ - ldp x10, x11, [p1+16]; \ - mul x4, x6, x10; \ - mul x9, x7, x11; \ - umulh x12, x6, x10; \ - subs x13, x6, x7; \ - cneg x13, x13, cc; \ - csetm x3, cc; \ - subs x2, x11, x10; \ - cneg x2, x2, cc; \ - mul x8, x13, x2; \ - umulh x2, x13, x2; \ - cinv x3, x3, cc; \ - eor x8, x8, x3; \ - eor x2, x2, x3; \ - adds x5, x4, x12; \ - adc x12, x12, xzr; \ - umulh x13, x7, x11; \ - adds x5, x5, x9; \ - adcs x12, x12, x13; \ - adc x13, x13, xzr; \ - adds x12, x12, x9; \ - adc x13, x13, xzr; \ - cmn x3, #0x1; \ - adcs x5, x5, x8; \ - adcs x12, x12, x2; \ - adc x13, x13, x3; \ - adds x4, x4, x4; \ - adcs x5, x5, x5; \ - adcs x12, x12, x12; \ - adcs x13, x13, x13; \ - adc x14, xzr, xzr; \ - mul x2, x6, x6; \ - mul x8, x7, x7; \ - mul x15, x6, x7; \ - umulh x3, x6, x6; \ - umulh x9, x7, x7; \ - umulh x16, x6, x7; \ - adds x3, x3, x15; \ - adcs x8, x8, x16; \ - adc x9, x9, xzr; \ - adds x3, x3, x15; \ - adcs x8, x8, x16; \ - adc x9, x9, xzr; \ - adds x4, x4, x8; \ - adcs x5, x5, x9; \ - adcs x12, x12, xzr; \ +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + mul x7, x3, x5; \ + umulh x8, x3, x5; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + mul x11, x3, x5; \ + umulh x12, x3, x5; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ adcs x13, x13, xzr; \ adc x14, x14, xzr; \ - mul x6, x10, x10; \ - mul x8, x11, x11; \ - mul x15, x10, x11; \ - umulh x7, x10, x10; \ - umulh x9, x11, x11; \ - umulh x16, x10, x11; \ - adds x7, x7, x15; \ - adcs x8, x8, x16; \ - adc x9, x9, xzr; \ - adds x7, x7, x15; \ - adcs x8, x8, x16; \ - adc x9, x9, xzr; \ - adds x6, x6, x12; \ - adcs x7, x7, x13; \ - adcs x8, x8, x14; \ - adc x9, x9, xzr; \ - mov x10, #0x26; \ - and x11, x6, #0xffffffff; \ - lsr x12, x6, #32; \ - mul x11, x10, x11; \ - mul x12, x10, x12; \ - adds x2, x2, x11; \ - and x11, x7, #0xffffffff; \ - lsr x7, x7, #32; \ - mul x11, x10, x11; \ - mul x7, x10, x7; \ - adcs x3, x3, x11; \ - and x11, x8, #0xffffffff; \ - lsr x8, x8, #32; \ - mul x11, x10, x11; \ - mul x8, x10, x8; \ - adcs x4, x4, x11; \ - and x11, x9, #0xffffffff; \ - lsr x9, x9, #32; \ - mul x11, x10, x11; \ - mul x9, x10, x9; \ - adcs x5, x5, x11; \ - cset x6, cs; \ - lsl x11, x12, #32; \ - adds x2, x2, x11; \ - extr x11, x7, x12, #32; \ - adcs x3, x3, x11; \ - extr x11, x8, x7, #32; \ - adcs x4, x4, x11; \ - extr x11, x9, x8, #32; \ - adcs x5, x5, x11; \ - lsr x11, x9, #32; \ - adc x6, x6, x11; \ - cmn x5, x5; \ - orr x5, x5, #0x8000000000000000; \ - adc x13, x6, x6; \ - mov x10, #0x13; \ - madd x11, x10, x13, x10; \ - adds x2, x2, x11; \ - adcs x3, x3, xzr; \ - adcs x4, x4, xzr; \ - adcs x5, x5, xzr; \ - csel x10, x10, xzr, cc; \ - subs x2, x2, x10; \ - sbcs x3, x3, xzr; \ - sbcs x4, x4, xzr; \ - sbc x5, x5, xzr; \ - and x5, x5, #0x7fffffffffffffff; \ - stp x2, x3, [p0]; \ - stp x4, x5, [p0+16] + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + and x5, x11, #0xffffffff; \ + lsr x4, x11, #32; \ + mul x5, x3, x5; \ + mul x4, x3, x4; \ + adds x7, x7, x5; \ + and x5, x12, #0xffffffff; \ + lsr x12, x12, #32; \ + mul x5, x3, x5; \ + mul x12, x3, x12; \ + adcs x8, x8, x5; \ + and x5, x13, #0xffffffff; \ + lsr x13, x13, #32; \ + mul x5, x3, x5; \ + mul x13, x3, x13; \ + adcs x9, x9, x5; \ + and x5, x14, #0xffffffff; \ + lsr x14, x14, #32; \ + mul x5, x3, x5; \ + mul x14, x3, x14; \ + adcs x10, x10, x5; \ + cset x11, cs; \ + lsl x5, x4, #32; \ + adds x7, x7, x5; \ + extr x5, x12, x4, #32; \ + adcs x8, x8, x5; \ + extr x5, x13, x12, #32; \ + adcs x9, x9, x5; \ + extr x5, x14, x13, #32; \ + adcs x10, x10, x5; \ + lsr x5, x14, #32; \ + adc x11, x11, x5; \ + cmn x10, x10; \ + bic x10, x10, #0x8000000000000000; \ + adc x0, x11, x11; \ + mov x3, #19; \ + mul x5, x3, x0; \ + adds x7, x7, x5; \ + adcs x8, x8, xzr; \ + adcs x9, x9, xzr; \ + adc x10, x10, xzr; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] // Multiplication just giving a 5-digit result (actually < 39 * 2^256) // by not doing anything beyond the first stage of reduction @@ -625,21 +667,6 @@ stp x2, x3, [p0]; \ stp x4, x5, [p0+16] -// Plain 4-digit add without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result - -#define add_4(p0,p1,p2) \ - ldp x0, x1, [p1]; \ - ldp x4, x5, [p2]; \ - adds x0, x0, x4; \ - adcs x1, x1, x5; \ - ldp x2, x3, [p1+16]; \ - ldp x6, x7, [p2+16]; \ - adcs x2, x2, x6; \ - adc x3, x3, x7; \ - stp x0, x1, [p0]; \ - stp x2, x3, [p0+16] - // Add 5-digit inputs and normalize to 4 digits #define add5_4(p0,p1,p2) \ @@ -666,28 +693,29 @@ stp x0, x1, [p0]; \ stp x2, x3, [p0+16] -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. -#define sub_4(p0,p1,p2) \ - ldp x5, x6, [p1]; \ - ldp x4, x3, [p2]; \ - subs x5, x5, x4; \ - sbcs x6, x6, x3; \ - ldp x7, x8, [p1+16]; \ - ldp x4, x3, [p2+16]; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - mov x3, #19; \ - subs x5, x5, x3; \ - sbcs x6, x6, xzr; \ - sbcs x7, x7, xzr; \ - mov x4, #0x8000000000000000; \ - sbc x8, x8, x4; \ - stp x5, x6, [p0]; \ - stp x7, x8, [p0+16] +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 @@ -809,73 +837,68 @@ S2N_BN_SYMBOL(curve25519_x25519): mov res, x0 -// Copy the inputs to the local variables while mangling them: +// Copy the inputs to the local variables with minimal mangling: // -// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. -// Actually the top zero doesn't matter since the loop below -// never looks at it, so we don't literally modify that. +// - The scalar is in principle turned into 01xxx...xxx000 but +// in the structure below the special handling of these bits is +// explicit in the main computation; the scalar is just copied. // -// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 +// - The point x coord is reduced mod 2^255 by masking off the +// top bit. In the main loop we only need reduction < 2 * p_25519. ldp x10, x11, [x1] - bic x10, x10, #7 stp x10, x11, [scalar] ldp x12, x13, [x1, #16] - orr x13, x13, #0x4000000000000000 stp x12, x13, [scalar+16] ldp x10, x11, [x2] - subs x6, x10, #-19 - adcs x7, x11, xzr + stp x10, x11, [pointx] ldp x12, x13, [x2, #16] and x13, x13, #0x7fffffffffffffff - adcs x8, x12, xzr - mov x9, #0x7fffffffffffffff - sbcs x9, x13, x9 - - csel x10, x6, x10, cs - csel x11, x7, x11, cs - csel x12, x8, x12, cs - csel x13, x9, x13, cs - - stp x10, x11, [pointx] stp x12, x13, [pointx+16] -// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 -// We use the fact that the point x coordinate is still in registers +// Initialize with explicit doubling in order to handle set bit 254. +// Set swap = 1 and (xm,zm) = (x,1) then double as (xn,zn) = 2 * (x,1). +// We use the fact that the point x coordinate is still in registers. +// Since zm = 1 we could do the doubling with an operation count of +// 2 * S + M instead of 2 * S + 2 * M, but it doesn't seem worth +// the slight complication arising from a different linear combination. - mov x2, #1 - stp x2, xzr, [xn] - stp xzr, xzr, [xn+16] - stp xzr, xzr, [zn] - stp xzr, xzr, [zn+16] + mov swap, #1 stp x10, x11, [xm] stp x12, x13, [xm+16] - stp x2, xzr, [zm] + stp swap, xzr, [zm] stp xzr, xzr, [zm+16] - mov swap, xzr -// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) -// This starts at 254, and so implicitly masks bit 255 of the scalar. + sub_twice4(d,xm,zm) + add_twice4(s,xm,zm) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + +// The main loop over unmodified bits from i = 253, ..., i = 3 (inclusive). +// This is a classic Montgomery ladder, with the main coordinates only +// reduced mod 2 * p_25519, some intermediate results even more loosely. - mov i, #254 + mov i, #253 scalarloop: + // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn -// The adds don't need any normalization as they're fed to muls -// Just make sure the subs fit in 4 digits - sub_4(dm, xm, zm) - add_4(sn, xn, zn) - sub_4(dn, xn, zn) - add_4(sm, xm, zm) + sub_twice4(dm,xm,zm) + add_twice4(sn,xn,zn) + sub_twice4(dn,xn,zn) + add_twice4(sm,xm,zm) -// ADDING: dmsn = dm * sn; dnsm = sm * dn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) - mul_5(dmsn,sn,dm) - lsr x0, i, #6 ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly lsr x2, x2, i @@ -887,18 +910,22 @@ scalarloop: mux_4(d,dm,dn) mux_4(s,sm,sn) +// ADDING: dmsn = dm * sn; dnsm = sm * dn + + mul_5(dmsn,sn,dm) + mul_5(dnsm,sm,dn) -// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits +// DOUBLING: d = (xt - zt)^2 sqr_4(d,d) // ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 -// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits +// DOUBLING: s = (xt + zt)^2 sub5_4(dpro,dmsn,dnsm) - sqr_4(s,s) add5_4(spro,dmsn,dnsm) + sqr_4(s,s) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d @@ -907,7 +934,7 @@ scalarloop: // ADDING: xm' = (dmsn + dnsm)^2 - sqr_p25519(xm,spro) + sqr_4(xm,spro) // DOUBLING: e = 121666 * p + d @@ -917,26 +944,66 @@ scalarloop: // DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d - mul_p25519(xn,s,d) - -// ADDING: zm' = x * (dmsn - dnsm)^2 - - mul_p25519(zm,dpro,pointx) + mul_4(xn,s,d) // DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) // = p * (d + 121666 * p) - mul_p25519(zn,p,e) + mul_4(zn,p,e) + +// ADDING: zm' = x * (dmsn - dnsm)^2 -// Loop down as far as 0 (inclusive) + mul_4(zm,dpro,pointx) - subs i, i, #1 +// Loop down as far as 3 (inclusive) + + sub i, i, #1 + cmp i, #3 bcs scalarloop -// Since the scalar was forced to be a multiple of 8, we know it's even. -// Hence there is no need to multiplex: the projective answer is (xn,zn) -// and we can ignore (xm,zm); indeed we could have avoided the last three -// differential additions and just done the doublings. +// Multiplex directly into (xn,zn) then do three pure doubling steps; +// this accounts for the implicit zeroing of the three lowest bits +// of the scalar. On the very last doubling we *fully* reduce zn mod +// p_25519 to ease checking for degeneracy below. + + cmp swap, xzr + mux_4(xn,xm,xn) + mux_4(zn,zm,zn) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_p25519(zn,p,e) + +// The projective result of the scalar multiplication is now (xn,zn). // First set up the constant sn = 2^255 - 19 for the modular inverse. mov x0, #-19 @@ -1307,7 +1374,7 @@ zfliploop: csel x3, x3, xzr, ne stp x2, x3, [xn+16] -// Now the result is xn * (1/zn). +// Now the result is xn * (1/zn), fully reduced modulo p. mul_p25519(resx,xn,zm) diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 19b23f96c5..5ae6c9fa19 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -75,10 +75,8 @@ #define NSPACE (12*NUMSIZE) -// Macros wrapping up the basic field operation calls -// bignum_mul_p25519_alt and bignum_sqr_p25519_alt. -// These two are only trivially different from pure -// function calls to those subroutines. +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. #define mul_p25519(p0,p1,p2) \ ldp x3, x4, [p1]; \ @@ -184,87 +182,106 @@ stp x12, x13, [p0]; \ stp x14, x15, [p0+16] -#define sqr_p25519(p0,p1) \ - ldp x2, x3, [p1]; \ - mul x9, x2, x3; \ - umulh x10, x2, x3; \ - ldp x4, x5, [p1+16]; \ - mul x11, x2, x5; \ - umulh x12, x2, x5; \ - mul x7, x2, x4; \ - umulh x6, x2, x4; \ - adds x10, x10, x7; \ - adcs x11, x11, x6; \ - mul x7, x3, x4; \ - umulh x6, x3, x4; \ - adc x6, x6, xzr; \ - adds x11, x11, x7; \ - mul x13, x4, x5; \ - umulh x14, x4, x5; \ - adcs x12, x12, x6; \ - mul x7, x3, x5; \ - umulh x6, x3, x5; \ - adc x6, x6, xzr; \ - adds x12, x12, x7; \ - adcs x13, x13, x6; \ - adc x14, x14, xzr; \ - adds x9, x9, x9; \ - adcs x10, x10, x10; \ - adcs x11, x11, x11; \ - adcs x12, x12, x12; \ - adcs x13, x13, x13; \ - adcs x14, x14, x14; \ - cset x6, hs; \ - umulh x7, x2, x2; \ - mul x8, x2, x2; \ - adds x9, x9, x7; \ - mul x7, x3, x3; \ - adcs x10, x10, x7; \ - umulh x7, x3, x3; \ - adcs x11, x11, x7; \ - mul x7, x4, x4; \ - adcs x12, x12, x7; \ - umulh x7, x4, x4; \ - adcs x13, x13, x7; \ - mul x7, x5, x5; \ - adcs x14, x14, x7; \ - umulh x7, x5, x5; \ - adc x6, x6, x7; \ - mov x3, #38; \ - mul x7, x3, x12; \ - umulh x4, x3, x12; \ - adds x8, x8, x7; \ - mul x7, x3, x13; \ - umulh x13, x3, x13; \ - adcs x9, x9, x7; \ - mul x7, x3, x14; \ - umulh x14, x3, x14; \ - adcs x10, x10, x7; \ - mul x7, x3, x6; \ - umulh x6, x3, x6; \ - adcs x11, x11, x7; \ - cset x12, hs; \ - adds x9, x9, x4; \ - adcs x10, x10, x13; \ - adcs x11, x11, x14; \ - adc x12, x12, x6; \ - cmn x11, x11; \ - orr x11, x11, #0x8000000000000000; \ - adc x2, x12, x12; \ - mov x3, #19; \ - madd x7, x3, x2, x3; \ - adds x8, x8, x7; \ - adcs x9, x9, xzr; \ - adcs x10, x10, xzr; \ - adcs x11, x11, xzr; \ - csel x3, x3, xzr, lo; \ - subs x8, x8, x3; \ - sbcs x9, x9, xzr; \ - sbcs x10, x10, xzr; \ - sbc x11, x11, xzr; \ - and x11, x11, #0x7fffffffffffffff; \ - stp x8, x9, [p0]; \ - stp x10, x11, [p0+16] +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #38; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, hs; \ + adds x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + bic x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #19; \ + mul x11, x7, x8; \ + adds x12, x12, x11; \ + adcs x13, x13, xzr; \ + adcs x14, x14, xzr; \ + adc x15, x15, xzr; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] // Multiplication just giving a 5-digit result (actually < 39 * 2^256) // by not doing anything beyond the first stage of reduction @@ -439,21 +456,6 @@ stp x8, x9, [p0]; \ stp x10, x11, [p0+16] -// Plain 4-digit add without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result - -#define add_4(p0,p1,p2) \ - ldp x0, x1, [p1]; \ - ldp x4, x5, [p2]; \ - adds x0, x0, x4; \ - adcs x1, x1, x5; \ - ldp x2, x3, [p1+16]; \ - ldp x6, x7, [p2+16]; \ - adcs x2, x2, x6; \ - adc x3, x3, x7; \ - stp x0, x1, [p0]; \ - stp x2, x3, [p0+16] - // Add 5-digit inputs and normalize to 4 digits #define add5_4(p0,p1,p2) \ @@ -480,28 +482,29 @@ stp x0, x1, [p0]; \ stp x2, x3, [p0+16] -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. -#define sub_4(p0,p1,p2) \ - ldp x5, x6, [p1]; \ - ldp x4, x3, [p2]; \ - subs x5, x5, x4; \ - sbcs x6, x6, x3; \ - ldp x7, x8, [p1+16]; \ - ldp x4, x3, [p2+16]; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - mov x3, #19; \ - subs x5, x5, x3; \ - sbcs x6, x6, xzr; \ - sbcs x7, x7, xzr; \ - mov x4, #0x8000000000000000; \ - sbc x8, x8, x4; \ - stp x5, x6, [p0]; \ - stp x7, x8, [p0+16] +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 @@ -623,73 +626,68 @@ S2N_BN_SYMBOL(curve25519_x25519_alt): mov res, x0 -// Copy the inputs to the local variables while mangling them: +// Copy the inputs to the local variables with minimal mangling: // -// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. -// Actually the top zero doesn't matter since the loop below -// never looks at it, so we don't literally modify that. +// - The scalar is in principle turned into 01xxx...xxx000 but +// in the structure below the special handling of these bits is +// explicit in the main computation; the scalar is just copied. // -// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 +// - The point x coord is reduced mod 2^255 by masking off the +// top bit. In the main loop we only need reduction < 2 * p_25519. ldp x10, x11, [x1] - bic x10, x10, #7 stp x10, x11, [scalar] ldp x12, x13, [x1, #16] - orr x13, x13, #0x4000000000000000 stp x12, x13, [scalar+16] ldp x10, x11, [x2] - subs x6, x10, #-19 - adcs x7, x11, xzr + stp x10, x11, [pointx] ldp x12, x13, [x2, #16] and x13, x13, #0x7fffffffffffffff - adcs x8, x12, xzr - mov x9, #0x7fffffffffffffff - sbcs x9, x13, x9 - - csel x10, x6, x10, cs - csel x11, x7, x11, cs - csel x12, x8, x12, cs - csel x13, x9, x13, cs - - stp x10, x11, [pointx] stp x12, x13, [pointx+16] -// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 -// We use the fact that the point x coordinate is still in registers +// Initialize with explicit doubling in order to handle set bit 254. +// Set swap = 1 and (xm,zm) = (x,1) then double as (xn,zn) = 2 * (x,1). +// We use the fact that the point x coordinate is still in registers. +// Since zm = 1 we could do the doubling with an operation count of +// 2 * S + M instead of 2 * S + 2 * M, but it doesn't seem worth +// the slight complication arising from a different linear combination. - mov x2, #1 - stp x2, xzr, [xn] - stp xzr, xzr, [xn+16] - stp xzr, xzr, [zn] - stp xzr, xzr, [zn+16] + mov swap, #1 stp x10, x11, [xm] stp x12, x13, [xm+16] - stp x2, xzr, [zm] + stp swap, xzr, [zm] stp xzr, xzr, [zm+16] - mov swap, xzr -// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) -// This starts at 254, and so implicitly masks bit 255 of the scalar. + sub_twice4(d,xm,zm) + add_twice4(s,xm,zm) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + +// The main loop over unmodified bits from i = 253, ..., i = 3 (inclusive). +// This is a classic Montgomery ladder, with the main coordinates only +// reduced mod 2 * p_25519, some intermediate results even more loosely. - mov i, #254 + mov i, #253 scalarloop: + // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn -// The adds don't need any normalization as they're fed to muls -// Just make sure the subs fit in 4 digits - sub_4(dm, xm, zm) - add_4(sn, xn, zn) - sub_4(dn, xn, zn) - add_4(sm, xm, zm) + sub_twice4(dm,xm,zm) + add_twice4(sn,xn,zn) + sub_twice4(dn,xn,zn) + add_twice4(sm,xm,zm) -// ADDING: dmsn = dm * sn; dnsm = sm * dn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) - mul_5(dmsn,sn,dm) - lsr x0, i, #6 ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly lsr x2, x2, i @@ -701,18 +699,22 @@ scalarloop: mux_4(d,dm,dn) mux_4(s,sm,sn) +// ADDING: dmsn = dm * sn; dnsm = sm * dn + + mul_5(dmsn,sn,dm) + mul_5(dnsm,sm,dn) -// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits +// DOUBLING: d = (xt - zt)^2 sqr_4(d,d) // ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 -// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits +// DOUBLING: s = (xt + zt)^2 sub5_4(dpro,dmsn,dnsm) - sqr_4(s,s) add5_4(spro,dmsn,dnsm) + sqr_4(s,s) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d @@ -721,7 +723,7 @@ scalarloop: // ADDING: xm' = (dmsn + dnsm)^2 - sqr_p25519(xm,spro) + sqr_4(xm,spro) // DOUBLING: e = 121666 * p + d @@ -731,26 +733,66 @@ scalarloop: // DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d - mul_p25519(xn,s,d) - -// ADDING: zm' = x * (dmsn - dnsm)^2 - - mul_p25519(zm,dpro,pointx) + mul_4(xn,s,d) // DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) // = p * (d + 121666 * p) - mul_p25519(zn,p,e) + mul_4(zn,p,e) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_4(zm,dpro,pointx) -// Loop down as far as 0 (inclusive) +// Loop down as far as 3 (inclusive) - subs i, i, #1 + sub i, i, #1 + cmp i, #3 bcs scalarloop -// Since the scalar was forced to be a multiple of 8, we know it's even. -// Hence there is no need to multiplex: the projective answer is (xn,zn) -// and we can ignore (xm,zm); indeed we could have avoided the last three -// differential additions and just done the doublings. +// Multiplex directly into (xn,zn) then do three pure doubling steps; +// this accounts for the implicit zeroing of the three lowest bits +// of the scalar. On the very last doubling we *fully* reduce zn mod +// p_25519 to ease checking for degeneracy below. + + cmp swap, xzr + mux_4(xn,xm,xn) + mux_4(zn,zm,zn) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_p25519(zn,p,e) + +// The projective result of the scalar multiplication is now (xn,zn). // First set up the constant sn = 2^255 - 19 for the modular inverse. mov x0, #-19 @@ -1121,7 +1163,7 @@ zfliploop: csel x3, x3, xzr, ne stp x2, x3, [xn+16] -// Now the result is xn * (1/zn). +// Now the result is xn * (1/zn), fully reduced modulo p. mul_p25519(resx,xn,zm) diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 689429c4b8..d83479a80a 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -78,10 +78,8 @@ #define NSPACE (13*NUMSIZE) -// Macros wrapping up the basic field operation calls -// bignum_mul_p25519 and bignum_sqr_p25519. -// These two are only trivially different from pure -// function calls to those subroutines. +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. #define mul_p25519(P0,P1,P2) \ xorl %edi, %edi ; \ @@ -176,78 +174,89 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -#define sqr_p25519(P0,P1) \ - movq P1, %rdx ; \ - mulxq %rdx, %r8, %r15 ; \ - mulxq 0x8+P1, %r9, %r10 ; \ - mulxq 0x18+P1, %r11, %r12 ; \ - movq 0x10+P1, %rdx ; \ - mulxq 0x18+P1, %r13, %r14 ; \ - xorl %ebx, %ebx ; \ - mulxq P1, %rax, %rcx ; \ + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + xorl %ecx, %ecx ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ adcxq %rax, %r10 ; \ - adoxq %rcx, %r11 ; \ - mulxq 0x8+P1, %rax, %rcx ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ adcxq %rax, %r11 ; \ - adoxq %rcx, %r12 ; \ - movq 0x18+P1, %rdx ; \ - mulxq 0x8+P1, %rax, %rcx ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ adcxq %rax, %r12 ; \ adoxq %rcx, %r13 ; \ - adcxq %rbx, %r13 ; \ + adcxq %rcx, %r13 ; \ + xorl %ecx, %ecx ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcxq %rcx, %r14 ; \ + xorl %ecx, %ecx ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ adoxq %rbx, %r14 ; \ - adcq %rbx, %r14 ; \ - xorl %ebx, %ebx ; \ - adcxq %r9, %r9 ; \ - adoxq %r15, %r9 ; \ - movq 0x8+P1, %rdx ; \ - mulxq %rdx, %rax, %rdx ; \ - adcxq %r10, %r10 ; \ - adoxq %rax, %r10 ; \ - adcxq %r11, %r11 ; \ - adoxq %rdx, %r11 ; \ - movq 0x10+P1, %rdx ; \ - mulxq %rdx, %rax, %rdx ; \ - adcxq %r12, %r12 ; \ - adoxq %rax, %r12 ; \ - adcxq %r13, %r13 ; \ - adoxq %rdx, %r13 ; \ - movq 0x18+P1, %rdx ; \ - mulxq %rdx, %rax, %r15 ; \ - adcxq %r14, %r14 ; \ - adoxq %rax, %r14 ; \ - adcxq %rbx, %r15 ; \ - adoxq %rbx, %r15 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcxq %rcx, %r15 ; \ movl $0x26, %edx ; \ - xorl %ebx, %ebx ; \ - mulxq %r12, %rax, %rcx ; \ + xorl %ecx, %ecx ; \ + mulxq %r12, %rax, %rbx ; \ adcxq %rax, %r8 ; \ - adoxq %rcx, %r9 ; \ - mulxq %r13, %rax, %rcx ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ adcxq %rax, %r9 ; \ - adoxq %rcx, %r10 ; \ - mulxq %r14, %rax, %rcx ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ adcxq %rax, %r10 ; \ - adoxq %rcx, %r11 ; \ + adoxq %rbx, %r11 ; \ mulxq %r15, %rax, %r12 ; \ adcxq %rax, %r11 ; \ - adoxq %rbx, %r12 ; \ - adcxq %rbx, %r12 ; \ + adoxq %rcx, %r12 ; \ + adcxq %rcx, %r12 ; \ shldq $0x1, %r11, %r12 ; \ - movl $0x13, %edx ; \ - leaq 0x1(%r12), %rax ; \ - bts $0x3f, %r11 ; \ - imulq %rdx, %rax ; \ - addq %rax, %r8 ; \ - adcq %rbx, %r9 ; \ - adcq %rbx, %r10 ; \ - adcq %rbx, %r11 ; \ - cmovbq %rbx, %rdx ; \ - subq %rdx, %r8 ; \ - sbbq %rbx, %r9 ; \ - sbbq %rbx, %r10 ; \ - sbbq %rbx, %r11 ; \ btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ movq %r8, P0 ; \ movq %r9, 0x8+P0 ; \ movq %r10, 0x10+P0 ; \ @@ -407,23 +416,6 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -// Plain 4-digit add without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result - -#define add_4(P0,P1,P2) \ - movq P1, %rax ; \ - addq P2, %rax ; \ - movq %rax, P0 ; \ - movq 8+P1, %rax ; \ - adcq 8+P2, %rax ; \ - movq %rax, 8+P0 ; \ - movq 16+P1, %rax ; \ - adcq 16+P2, %rax ; \ - movq %rax, 16+P0 ; \ - movq 24+P1, %rax ; \ - adcq 24+P2, %rax ; \ - movq %rax, 24+P0 - // Add 5-digit inputs and normalize to 4 digits #define add5_4(P0,P1,P2) \ @@ -451,29 +443,32 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly +// Modular addition with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. -#define sub_4(P0,P1,P2) \ +#define add_twice4(P0,P1,P2) \ movq P1, %r8 ; \ - subq P2, %r8 ; \ - movq 8+P1, %r9 ; \ - sbbq 8+P2, %r9 ; \ - movq 16+P1, %r10 ; \ - sbbq 16+P2, %r10 ; \ - movq 24+P1, %rax ; \ - sbbq 24+P2, %rax ; \ - subq $19, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ movq %r8, P0 ; \ - sbbq $0, %r9 ; \ - movq %r9, 8+P0 ; \ - sbbq $0, %r10 ; \ - movq %r10, 16+P0 ; \ - sbbq $0, %rax ; \ - btc $63, %rax ; \ - movq %rax, 24+P0 + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 @@ -613,23 +608,22 @@ S2N_BN_SYMBOL(curve25519_x25519): movq %rdi, res -// Copy the inputs to the local variables while mangling them: +// Copy the inputs to the local variables with minimal mangling: // -// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. -// Actually the top zero doesn't matter since the loop below -// never looks at it, so we don't literally modify that. +// - The scalar is in principle turned into 01xxx...xxx000 but +// in the structure below the special handling of these bits is +// explicit in the main computation; the scalar is just copied. // -// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 +// - The point x coord is reduced mod 2^255 by masking off the +// top bit. In the main loop we only need reduction < 2 * p_25519. movq (%rsi), %rax - andq $~7, %rax movq %rax, (%rsp) movq 8(%rsi), %rax movq %rax, 8(%rsp) movq 16(%rsi), %rax movq %rax, 16(%rsp) movq 24(%rsi), %rax - bts $62, %rax movq %rax, 24(%rsp) movq (%rdx), %r8 @@ -637,70 +631,57 @@ S2N_BN_SYMBOL(curve25519_x25519): movq 16(%rdx), %r10 movq 24(%rdx), %r11 btr $63, %r11 - movq $19, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 - addq %r8, %r12 - adcq %r9, %r13 - adcq %r10, %r14 - adcq %r11, %r15 - btr $63, %r15 // x >= 2^255 - 19 <=> x + 19 >= 2^255 - cmovcq %r12, %r8 movq %r8, 32(%rsp) - cmovcq %r13, %r9 movq %r9, 40(%rsp) - cmovcq %r14, %r10 movq %r10, 48(%rsp) - cmovcq %r15, %r11 movq %r11, 56(%rsp) -// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 -// We use the fact that the point x coordinate is still in registers +// Initialize with explicit doubling in order to handle set bit 254. +// Set swap = 1 and (xm,zm) = (x,1) then double as (xn,zn) = 2 * (x,1). +// We use the fact that the point x coordinate is still in registers. +// Since zm = 1 we could do the doubling with an operation count of +// 2 * S + M instead of 2 * S + 2 * M, but it doesn't seem worth +// the slight complication arising from a different linear combination. - movq $1, %rax - movq %rax, 320(%rsp) - movq %rax, 96(%rsp) - xorl %eax, %eax + movl $1, %eax movq %rax, swap - movq %rax, 160(%rsp) - movq %rax, 328(%rsp) - movq %rax, 104(%rsp) - movq %rax, 168(%rsp) - movq %rax, 336(%rsp) - movq %rax, 112(%rsp) - movq %rax, 176(%rsp) - movq %rax, 344(%rsp) - movq %rax, 120(%rsp) - movq %rax, 184(%rsp) - movq 32(%rsp), %rax movq %r8, 256(%rsp) + movq %rax, 96(%rsp) + xorl %eax, %eax movq %r9, 264(%rsp) + movq %rax, 104(%rsp) movq %r10, 272(%rsp) + movq %rax, 112(%rsp) movq %r11, 280(%rsp) + movq %rax, 120(%rsp) -// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) -// This starts at 254, and so implicitly masks bit 255 of the scalar. + sub_twice4(d,xm,zm) + add_twice4(s,xm,zm) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + +// The main loop over unmodified bits from i = 253, ..., i = 3 (inclusive). +// This is a classic Montgomery ladder, with the main coordinates only +// reduced mod 2 * p_25519, some intermediate results even more loosely. - movl $254, %eax + movl $253, %eax movq %rax, i scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn -// The adds don't need any normalization as they're fed to muls -// Just make sure the subs fit in 4 digits. - sub_4(dm,xm,zm) - add_4(sn,xn,zn) - sub_4(dn,xn,zn) - add_4(sm,xm,zm) + sub_twice4(dm,xm,zm) + add_twice4(sn,xn,zn) + sub_twice4(dn,xn,zn) + add_twice4(sm,xm,zm) -// ADDING: dmsn = dm * sn; dnsm = sm * dn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) - mul_5(dmsn,sn,dm) - movq i, %rdx movq %rdx, %rcx shrq $6, %rdx @@ -709,22 +690,25 @@ scalarloop: andq $1, %rdx cmpq swap, %rdx movq %rdx, swap - mux_4(d,dm,dn) mux_4(s,sm,sn) +// ADDING: dmsn = dm * sn; dnsm = sm * dn + + mul_5(dmsn,sn,dm) + mul_5(dnsm,sm,dn) -// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits +// DOUBLING: d = (xt - zt)^2 sqr_4(d,d) // ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 -// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits +// DOUBLING: s = (xt + zt)^2 sub5_4(dpro,dmsn,dnsm) - sqr_4(s,s) add5_4(spro,dmsn,dnsm) + sqr_4(s,s) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d @@ -733,7 +717,7 @@ scalarloop: // ADDING: xm' = (dmsn + dnsm)^2 - sqr_p25519(xm,spro) + sqr_4(xm,spro) // DOUBLING: e = 121666 * p + d @@ -741,28 +725,63 @@ scalarloop: // DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d - mul_p25519(xn,s,d) - -// ADDING: zm' = x * (dmsn - dnsm)^2 - - mul_p25519(zm,dpro,pointx) + mul_4(xn,s,d) // DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) // = p * (d + 121666 * p) - mul_p25519(zn,p,e) + mul_4(zn,p,e) + +// ADDING: zm' = x * (dmsn - dnsm)^2 -// Loop down as far as 0 (inclusive) + mul_4(zm,dpro,pointx) + +// Loop down as far as 3 (inclusive) movq i, %rax subq $1, %rax movq %rax, i + cmpq $3, %rax jnc scalarloop -// Since the scalar was forced to be a multiple of 8, we know it's even. -// Hence there is no need to multiplex: the projective answer is (xn,zn) -// and we can ignore (xm,zm); indeed we could have avoided the last three -// differential additions and just done the doublings. +// Multiplex directly into (xn,zn) then do three pure doubling steps; +// this accounts for the implicit zeroing of the three lowest bits +// of the scalar. On the very last doubling we *fully* reduce zn mod +// p_25519 to ease checking for degeneracy below. + + movq swap, %rdx + testq %rdx, %rdx + mux_4(xn,xm,xn) + mux_4(zn,zm,zn) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_p25519(zn,p,e) + +// The projective result of the scalar multiplication is now (xn,zn). // First set up the constant sn = 2^255 - 19 for the modular inverse. movq $-19, %rax @@ -1200,7 +1219,7 @@ fliploop: cmovzq %rax, %rcx movq %rcx, 344(%rsp) -// Now the result is xn * (1/zn). +// Now the result is xn * (1/zn), fully reduced modulo p. movq res, %rbp mul_p25519(resx,xn,zm) diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 1d132ab051..5abfaf0180 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -78,10 +78,8 @@ #define NSPACE (13*NUMSIZE) -// Macros wrapping up the basic field operation calls -// bignum_mul_p25519_alt and bignum_sqr_p25519_alt. -// These two are only trivially different from pure -// function calls to those subroutines. +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. #define mul_p25519(P0,P1,P2) \ movq P1, %rax ; \ @@ -214,77 +212,91 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -#define sqr_p25519(P0,P1) \ +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ movq P1, %rax ; \ - mulq %rax; \ + mulq P2; \ movq %rax, %r8 ; \ movq %rdx, %r9 ; \ xorq %r10, %r10 ; \ xorq %r11, %r11 ; \ movq P1, %rax ; \ - mulq 0x8+P1; \ - addq %rax, %rax ; \ - adcq %rdx, %rdx ; \ - adcq $0x0, %r11 ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ addq %rax, %r9 ; \ adcq %rdx, %r10 ; \ adcq $0x0, %r11 ; \ xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ movq 0x8+P1, %rax ; \ - mulq %rax; \ + mulq 0x8+P2; \ addq %rax, %r10 ; \ adcq %rdx, %r11 ; \ adcq $0x0, %r12 ; \ - movq P1, %rax ; \ - mulq 0x10+P1; \ - addq %rax, %rax ; \ - adcq %rdx, %rdx ; \ - adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ addq %rax, %r10 ; \ adcq %rdx, %r11 ; \ adcq $0x0, %r12 ; \ xorq %r13, %r13 ; \ movq P1, %rax ; \ - mulq 0x18+P1; \ - addq %rax, %rax ; \ - adcq %rdx, %rdx ; \ - adcq $0x0, %r13 ; \ + mulq 0x18+P2; \ addq %rax, %r11 ; \ adcq %rdx, %r12 ; \ - adcq $0x0, %r13 ; \ + adcq %r13, %r13 ; \ movq 0x8+P1, %rax ; \ - mulq 0x10+P1; \ - addq %rax, %rax ; \ - adcq %rdx, %rdx ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ addq %rax, %r11 ; \ adcq %rdx, %r12 ; \ adcq $0x0, %r13 ; \ xorq %r14, %r14 ; \ movq 0x8+P1, %rax ; \ - mulq 0x18+P1; \ - addq %rax, %rax ; \ - adcq %rdx, %rdx ; \ - adcq $0x0, %r14 ; \ + mulq 0x18+P2; \ addq %rax, %r12 ; \ adcq %rdx, %r13 ; \ - adcq $0x0, %r14 ; \ + adcq %r14, %r14 ; \ movq 0x10+P1, %rax ; \ - mulq %rax; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ addq %rax, %r12 ; \ adcq %rdx, %r13 ; \ adcq $0x0, %r14 ; \ xorq %r15, %r15 ; \ movq 0x10+P1, %rax ; \ - mulq 0x18+P1; \ - addq %rax, %rax ; \ - adcq %rdx, %rdx ; \ - adcq $0x0, %r15 ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ addq %rax, %r13 ; \ adcq %rdx, %r14 ; \ adcq $0x0, %r15 ; \ movq 0x18+P1, %rax ; \ - mulq %rax; \ + mulq 0x18+P2; \ addq %rax, %r14 ; \ adcq %rdx, %r15 ; \ movl $0x26, %esi ; \ @@ -313,25 +325,16 @@ movq %rdx, %r12 ; \ adcq %rcx, %r12 ; \ shldq $0x1, %r11, %r12 ; \ - leaq 0x1(%r12), %rax ; \ - movl $0x13, %esi ; \ - bts $63, %r11 ; \ - imulq %rsi, %rax ; \ - addq %rax, %r8 ; \ - adcq %rcx, %r9 ; \ - adcq %rcx, %r10 ; \ - adcq %rcx, %r11 ; \ - sbbq %rax, %rax ; \ - notq %rax; \ - andq %rsi, %rax ; \ - subq %rax, %r8 ; \ - sbbq %rcx, %r9 ; \ - sbbq %rcx, %r10 ; \ - sbbq %rcx, %r11 ; \ - btr $63, %r11 ; \ - movq %r8, P0 ; \ - movq %r9, 0x8+P0 ; \ - movq %r10, 0x10+P0 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 // Multiplication just giving a 5-digit result (actually < 39 * p_25519) @@ -567,23 +570,6 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -// Plain 4-digit add without any normalization -// With inputs < p_25519 (indeed < 2^255) it still gives a 4-digit result - -#define add_4(P0,P1,P2) \ - movq P1, %rax ; \ - addq P2, %rax ; \ - movq %rax, P0 ; \ - movq 8+P1, %rax ; \ - adcq 8+P2, %rax ; \ - movq %rax, 8+P0 ; \ - movq 16+P1, %rax ; \ - adcq 16+P2, %rax ; \ - movq %rax, 16+P0 ; \ - movq 24+P1, %rax ; \ - adcq 24+P2, %rax ; \ - movq %rax, 24+P0 - // Add 5-digit inputs and normalize to 4 digits #define add5_4(P0,P1,P2) \ @@ -611,29 +597,32 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 -// Subtraction of a pair of numbers < p_25519 just sufficient -// to give a 4-digit result. It actually always does (x - z) + (2^255-19) -// which in turn is done by (x - z) - (2^255+19) discarding the 2^256 -// implicitly +// Modular addition with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. -#define sub_4(P0,P1,P2) \ +#define add_twice4(P0,P1,P2) \ movq P1, %r8 ; \ - subq P2, %r8 ; \ - movq 8+P1, %r9 ; \ - sbbq 8+P2, %r9 ; \ - movq 16+P1, %r10 ; \ - sbbq 16+P2, %r10 ; \ - movq 24+P1, %rax ; \ - sbbq 24+P2, %rax ; \ - subq $19, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ movq %r8, P0 ; \ - sbbq $0, %r9 ; \ - movq %r9, 8+P0 ; \ - sbbq $0, %r10 ; \ - movq %r10, 16+P0 ; \ - sbbq $0, %rax ; \ - btc $63, %rax ; \ - movq %rax, 24+P0 + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 @@ -779,23 +768,22 @@ S2N_BN_SYMBOL(curve25519_x25519_alt): movq %rdi, res -// Copy the inputs to the local variables while mangling them: +// Copy the inputs to the local variables with minimal mangling: // -// - The scalar gets turned into 01xxx...xxx000 by tweaking bits. -// Actually the top zero doesn't matter since the loop below -// never looks at it, so we don't literally modify that. +// - The scalar is in principle turned into 01xxx...xxx000 but +// in the structure below the special handling of these bits is +// explicit in the main computation; the scalar is just copied. // -// - The point x coord is reduced mod 2^255 *then* mod 2^255-19 +// - The point x coord is reduced mod 2^255 by masking off the +// top bit. In the main loop we only need reduction < 2 * p_25519. movq (%rsi), %rax - andq $~7, %rax movq %rax, (%rsp) movq 8(%rsi), %rax movq %rax, 8(%rsp) movq 16(%rsi), %rax movq %rax, 16(%rsp) movq 24(%rsi), %rax - bts $62, %rax movq %rax, 24(%rsp) movq (%rdx), %r8 @@ -803,70 +791,57 @@ S2N_BN_SYMBOL(curve25519_x25519_alt): movq 16(%rdx), %r10 movq 24(%rdx), %r11 btr $63, %r11 - movq $19, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 - addq %r8, %r12 - adcq %r9, %r13 - adcq %r10, %r14 - adcq %r11, %r15 - btr $63, %r15 // x >= 2^255 - 19 <=> x + 19 >= 2^255 - cmovcq %r12, %r8 movq %r8, 32(%rsp) - cmovcq %r13, %r9 movq %r9, 40(%rsp) - cmovcq %r14, %r10 movq %r10, 48(%rsp) - cmovcq %r15, %r11 movq %r11, 56(%rsp) -// Initialize (xn,zn) = (1,0) and (xm,zm) = (x,1) with swap = 0 -// We use the fact that the point x coordinate is still in registers +// Initialize with explicit doubling in order to handle set bit 254. +// Set swap = 1 and (xm,zm) = (x,1) then double as (xn,zn) = 2 * (x,1). +// We use the fact that the point x coordinate is still in registers. +// Since zm = 1 we could do the doubling with an operation count of +// 2 * S + M instead of 2 * S + 2 * M, but it doesn't seem worth +// the slight complication arising from a different linear combination. - movq $1, %rax - movq %rax, 320(%rsp) - movq %rax, 96(%rsp) - xorl %eax, %eax + movl $1, %eax movq %rax, swap - movq %rax, 160(%rsp) - movq %rax, 328(%rsp) - movq %rax, 104(%rsp) - movq %rax, 168(%rsp) - movq %rax, 336(%rsp) - movq %rax, 112(%rsp) - movq %rax, 176(%rsp) - movq %rax, 344(%rsp) - movq %rax, 120(%rsp) - movq %rax, 184(%rsp) - movq 32(%rsp), %rax movq %r8, 256(%rsp) + movq %rax, 96(%rsp) + xorl %eax, %eax movq %r9, 264(%rsp) + movq %rax, 104(%rsp) movq %r10, 272(%rsp) + movq %rax, 112(%rsp) movq %r11, 280(%rsp) + movq %rax, 120(%rsp) + + sub_twice4(d,xm,zm) + add_twice4(s,xm,zm) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) -// The outer loop over scalar bits from i = 254, ..., i = 0 (inclusive) -// This starts at 254, and so implicitly masks bit 255 of the scalar. +// The main loop over unmodified bits from i = 253, ..., i = 3 (inclusive). +// This is a classic Montgomery ladder, with the main coordinates only +// reduced mod 2 * p_25519, some intermediate results even more loosely. - movl $254, %eax + movl $253, %eax movq %rax, i scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn -// The adds don't need any normalization as they're fed to muls -// Just make sure the subs fit in 4 digits. - sub_4(dm,xm,zm) - add_4(sn,xn,zn) - sub_4(dn,xn,zn) - add_4(sm,xm,zm) + sub_twice4(dm,xm,zm) + add_twice4(sn,xn,zn) + sub_twice4(dn,xn,zn) + add_twice4(sm,xm,zm) -// ADDING: dmsn = dm * sn; dnsm = sm * dn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) - mul_5(dmsn,sn,dm) - movq i, %rdx movq %rdx, %rcx shrq $6, %rdx @@ -875,22 +850,25 @@ scalarloop: andq $1, %rdx cmpq swap, %rdx movq %rdx, swap - mux_4(d,dm,dn) mux_4(s,sm,sn) +// ADDING: dmsn = dm * sn; dnsm = sm * dn + + mul_5(dmsn,sn,dm) + mul_5(dnsm,sm,dn) -// DOUBLING: d = (xt - zt)^2 normalized only to 4 digits +// DOUBLING: d = (xt - zt)^2 sqr_4(d,d) // ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 -// DOUBLING: s = (xt + zt)^2, normalized only to 4 digits +// DOUBLING: s = (xt + zt)^2 sub5_4(dpro,dmsn,dnsm) - sqr_4(s,s) add5_4(spro,dmsn,dnsm) + sqr_4(s,s) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d @@ -899,7 +877,7 @@ scalarloop: // ADDING: xm' = (dmsn + dnsm)^2 - sqr_p25519(xm,spro) + sqr_4(xm,spro) // DOUBLING: e = 121666 * p + d @@ -907,28 +885,63 @@ scalarloop: // DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d - mul_p25519(xn,s,d) - -// ADDING: zm' = x * (dmsn - dnsm)^2 - - mul_p25519(zm,dpro,pointx) + mul_4(xn,s,d) // DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) // = p * (d + 121666 * p) - mul_p25519(zn,p,e) + mul_4(zn,p,e) + +// ADDING: zm' = x * (dmsn - dnsm)^2 -// Loop down as far as 0 (inclusive) + mul_4(zm,dpro,pointx) + +// Loop down as far as 3 (inclusive) movq i, %rax subq $1, %rax movq %rax, i + cmpq $3, %rax jnc scalarloop -// Since the scalar was forced to be a multiple of 8, we know it's even. -// Hence there is no need to multiplex: the projective answer is (xn,zn) -// and we can ignore (xm,zm); indeed we could have avoided the last three -// differential additions and just done the doublings. +// Multiplex directly into (xn,zn) then do three pure doubling steps; +// this accounts for the implicit zeroing of the three lowest bits +// of the scalar. On the very last doubling we *fully* reduce zn mod +// p_25519 to ease checking for degeneracy below. + + movq swap, %rdx + testq %rdx, %rdx + mux_4(xn,xm,xn) + mux_4(zn,zm,zn) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + cmadd_4(e,0x1db42,p,d) + mul_4(xn,s,d) + mul_p25519(zn,p,e) + +// The projective result of the scalar multiplication is now (xn,zn). // First set up the constant sn = 2^255 - 19 for the modular inverse. movq $-19, %rax @@ -1366,7 +1379,7 @@ fliploop: cmovzq %rax, %rcx movq %rcx, 344(%rsp) -// Now the result is xn * (1/zn). +// Now the result is xn * (1/zn), fully reduced modulo p. movq res, %rbp mul_p25519(resx,xn,zm) From 26b8398ca7d348ee1ceeda37dda9de04a79d25ad Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 7 Mar 2023 21:04:14 -0800 Subject: [PATCH 27/42] Reorder some field operations in X25519 functions In slightly different ways for ARM and x86. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/bf52775d206ecc9710ae439f131d0d879a273c9a --- arm/curve25519/curve25519_x25519.S | 18 +++++++++--------- arm/curve25519/curve25519_x25519_alt.S | 18 +++++++++--------- x86_att/curve25519/curve25519_x25519.S | 4 +--- x86_att/curve25519/curve25519_x25519_alt.S | 3 +-- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index 27e41644db..02407400a8 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -889,7 +889,6 @@ S2N_BN_SYMBOL(curve25519_x25519): scalarloop: - // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn sub_twice4(dm,xm,zm) @@ -897,8 +896,11 @@ scalarloop: sub_twice4(dn,xn,zn) add_twice4(sm,xm,zm) +// ADDING: dmsn = dm * sn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + mul_5(dmsn,sn,dm) + lsr x0, i, #6 ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly lsr x2, x2, i @@ -910,9 +912,7 @@ scalarloop: mux_4(d,dm,dn) mux_4(s,sm,sn) -// ADDING: dmsn = dm * sn; dnsm = sm * dn - - mul_5(dmsn,sn,dm) +// ADDING: dnsm = sm * dn mul_5(dnsm,sm,dn) @@ -924,8 +924,8 @@ scalarloop: // DOUBLING: s = (xt + zt)^2 sub5_4(dpro,dmsn,dnsm) - add5_4(spro,dmsn,dnsm) sqr_4(s,s) + add5_4(spro,dmsn,dnsm) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d @@ -946,15 +946,15 @@ scalarloop: mul_4(xn,s,d) +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_4(zm,dpro,pointx) + // DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) // = p * (d + 121666 * p) mul_4(zn,p,e) -// ADDING: zm' = x * (dmsn - dnsm)^2 - - mul_4(zm,dpro,pointx) - // Loop down as far as 3 (inclusive) sub i, i, #1 diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 5ae6c9fa19..97e9ddc2cd 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -678,7 +678,6 @@ S2N_BN_SYMBOL(curve25519_x25519_alt): scalarloop: - // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn sub_twice4(dm,xm,zm) @@ -686,8 +685,11 @@ scalarloop: sub_twice4(dn,xn,zn) add_twice4(sm,xm,zm) +// ADDING: dmsn = dm * sn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + mul_5(dmsn,sn,dm) + lsr x0, i, #6 ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly lsr x2, x2, i @@ -699,9 +701,7 @@ scalarloop: mux_4(d,dm,dn) mux_4(s,sm,sn) -// ADDING: dmsn = dm * sn; dnsm = sm * dn - - mul_5(dmsn,sn,dm) +// ADDING: dnsm = sm * dn mul_5(dnsm,sm,dn) @@ -713,8 +713,8 @@ scalarloop: // DOUBLING: s = (xt + zt)^2 sub5_4(dpro,dmsn,dnsm) - add5_4(spro,dmsn,dnsm) sqr_4(s,s) + add5_4(spro,dmsn,dnsm) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d @@ -735,15 +735,15 @@ scalarloop: mul_4(xn,s,d) +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_4(zm,dpro,pointx) + // DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) // = p * (d + 121666 * p) mul_4(zn,p,e) -// ADDING: zm' = x * (dmsn - dnsm)^2 - - mul_4(zm,dpro,pointx) - // Loop down as far as 3 (inclusive) sub i, i, #1 diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index d83479a80a..d103ec911c 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -174,7 +174,6 @@ movq %r10, 0x10+P0 ; \ movq %r11, 0x18+P0 - // A version of multiplication that only guarantees output < 2 * p_25519. // This basically skips the +1 and final correction in quotient estimation. @@ -695,9 +694,8 @@ scalarloop: // ADDING: dmsn = dm * sn; dnsm = sm * dn - mul_5(dmsn,sn,dm) - mul_5(dnsm,sm,dn) + mul_5(dmsn,sn,dm) // DOUBLING: d = (xt - zt)^2 diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 5abfaf0180..1d4ab64bc4 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -855,9 +855,8 @@ scalarloop: // ADDING: dmsn = dm * sn; dnsm = sm * dn - mul_5(dmsn,sn,dm) - mul_5(dnsm,sm,dn) + mul_5(dmsn,sn,dm) // DOUBLING: d = (xt - zt)^2 From c323a2d834a9103e9ddce4e1328b6cf66508ea61 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 8 Mar 2023 16:26:18 -0800 Subject: [PATCH 28/42] Eliminate 5-digit intermediates in ARM X25519 functions This is simpler and if anything slightly faster. That seems less clear on x86, so there is no analogous change there. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/a69657bdad9673491b9f2dee103d90f582ca1bed --- arm/curve25519/curve25519_x25519.S | 237 ++----------------------- arm/curve25519/curve25519_x25519_alt.S | 178 ++----------------- 2 files changed, 24 insertions(+), 391 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index 02407400a8..e1d17f4a0a 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -41,7 +41,6 @@ #define resx res, #0 // Pointer-offset pairs for temporaries on stack with some aliasing. -// Both dmsn and dnsm need space for >= 5 digits, and we allocate 8 #define scalar sp, #(0*NUMSIZE) @@ -62,18 +61,18 @@ #define dmsn sp, #(6*NUMSIZE) #define p sp, #(6*NUMSIZE) -#define xm sp, #(8*NUMSIZE) -#define dnsm sp, #(8*NUMSIZE) -#define spro sp, #(8*NUMSIZE) +#define xm sp, #(7*NUMSIZE) +#define dnsm sp, #(7*NUMSIZE) +#define spro sp, #(7*NUMSIZE) -#define xn sp, #(10*NUMSIZE) -#define s sp, #(10*NUMSIZE) +#define d sp, #(8*NUMSIZE) -#define d sp, #(11*NUMSIZE) +#define xn sp, #(9*NUMSIZE) +#define s sp, #(9*NUMSIZE) // Total size to reserve on the stack -#define NSPACE (12*NUMSIZE) +#define NSPACE (10*NUMSIZE) // Macro wrapping up the basic field operation bignum_mul_p25519, only // trivially different from a pure function call to that subroutine. @@ -401,158 +400,6 @@ stp x7, x8, [P0]; \ stp x9, x10, [P0+16] -// Multiplication just giving a 5-digit result (actually < 39 * 2^256) -// by not doing anything beyond the first stage of reduction - -#define mul_5(p0,p1,p2) \ - ldp x3, x4, [p1]; \ - ldp x5, x6, [p2]; \ - mul x7, x3, x5; \ - umulh x8, x3, x5; \ - mul x9, x4, x6; \ - umulh x10, x4, x6; \ - subs x4, x4, x3; \ - cneg x4, x4, cc; \ - csetm x16, cc; \ - adds x9, x9, x8; \ - adc x10, x10, xzr; \ - subs x3, x5, x6; \ - cneg x3, x3, cc; \ - cinv x16, x16, cc; \ - mul x15, x4, x3; \ - umulh x3, x4, x3; \ - adds x8, x7, x9; \ - adcs x9, x9, x10; \ - adc x10, x10, xzr; \ - cmn x16, #0x1; \ - eor x15, x15, x16; \ - adcs x8, x15, x8; \ - eor x3, x3, x16; \ - adcs x9, x3, x9; \ - adc x10, x10, x16; \ - ldp x3, x4, [p1+16]; \ - ldp x5, x6, [p2+16]; \ - mul x11, x3, x5; \ - umulh x12, x3, x5; \ - mul x13, x4, x6; \ - umulh x14, x4, x6; \ - subs x4, x4, x3; \ - cneg x4, x4, cc; \ - csetm x16, cc; \ - adds x13, x13, x12; \ - adc x14, x14, xzr; \ - subs x3, x5, x6; \ - cneg x3, x3, cc; \ - cinv x16, x16, cc; \ - mul x15, x4, x3; \ - umulh x3, x4, x3; \ - adds x12, x11, x13; \ - adcs x13, x13, x14; \ - adc x14, x14, xzr; \ - cmn x16, #0x1; \ - eor x15, x15, x16; \ - adcs x12, x15, x12; \ - eor x3, x3, x16; \ - adcs x13, x3, x13; \ - adc x14, x14, x16; \ - ldp x3, x4, [p1+16]; \ - ldp x15, x16, [p1]; \ - subs x3, x3, x15; \ - sbcs x4, x4, x16; \ - csetm x16, cc; \ - ldp x15, x0, [p2]; \ - subs x5, x15, x5; \ - sbcs x6, x0, x6; \ - csetm x0, cc; \ - eor x3, x3, x16; \ - subs x3, x3, x16; \ - eor x4, x4, x16; \ - sbc x4, x4, x16; \ - eor x5, x5, x0; \ - subs x5, x5, x0; \ - eor x6, x6, x0; \ - sbc x6, x6, x0; \ - eor x16, x0, x16; \ - adds x11, x11, x9; \ - adcs x12, x12, x10; \ - adcs x13, x13, xzr; \ - adc x14, x14, xzr; \ - mul x2, x3, x5; \ - umulh x0, x3, x5; \ - mul x15, x4, x6; \ - umulh x1, x4, x6; \ - subs x4, x4, x3; \ - cneg x4, x4, cc; \ - csetm x9, cc; \ - adds x15, x15, x0; \ - adc x1, x1, xzr; \ - subs x6, x5, x6; \ - cneg x6, x6, cc; \ - cinv x9, x9, cc; \ - mul x5, x4, x6; \ - umulh x6, x4, x6; \ - adds x0, x2, x15; \ - adcs x15, x15, x1; \ - adc x1, x1, xzr; \ - cmn x9, #0x1; \ - eor x5, x5, x9; \ - adcs x0, x5, x0; \ - eor x6, x6, x9; \ - adcs x15, x6, x15; \ - adc x1, x1, x9; \ - adds x9, x11, x7; \ - adcs x10, x12, x8; \ - adcs x11, x13, x11; \ - adcs x12, x14, x12; \ - adcs x13, x13, xzr; \ - adc x14, x14, xzr; \ - cmn x16, #0x1; \ - eor x2, x2, x16; \ - adcs x9, x2, x9; \ - eor x0, x0, x16; \ - adcs x10, x0, x10; \ - eor x15, x15, x16; \ - adcs x11, x15, x11; \ - eor x1, x1, x16; \ - adcs x12, x1, x12; \ - adcs x13, x13, x16; \ - adc x14, x14, x16; \ - mov x3, #0x26; \ - and x5, x11, #0xffffffff; \ - lsr x4, x11, #32; \ - mul x5, x3, x5; \ - mul x4, x3, x4; \ - adds x7, x7, x5; \ - and x5, x12, #0xffffffff; \ - lsr x12, x12, #32; \ - mul x5, x3, x5; \ - mul x12, x3, x12; \ - adcs x8, x8, x5; \ - and x5, x13, #0xffffffff; \ - lsr x13, x13, #32; \ - mul x5, x3, x5; \ - mul x13, x3, x13; \ - adcs x9, x9, x5; \ - and x5, x14, #0xffffffff; \ - lsr x14, x14, #32; \ - mul x5, x3, x5; \ - mul x14, x3, x14; \ - adcs x10, x10, x5; \ - cset x11, cs; \ - lsl x5, x4, #32; \ - adds x7, x7, x5; \ - extr x5, x12, x4, #32; \ - adcs x8, x8, x5; \ - extr x5, x13, x12, #32; \ - adcs x9, x9, x5; \ - extr x5, x14, x13, #32; \ - adcs x10, x10, x5; \ - lsr x5, x14, #32; \ - adc x11, x11, x5; \ - stp x7, x8, [p0]; \ - stp x9, x10, [p0+16]; \ - str x11, [p0+32] - // Squaring just giving a result < 2 * p_25519, which is done by // basically skipping the +1 in the quotient estimate and the final // optional correction. @@ -667,33 +514,7 @@ stp x2, x3, [p0]; \ stp x4, x5, [p0+16] -// Add 5-digit inputs and normalize to 4 digits - -#define add5_4(p0,p1,p2) \ - ldp x0, x1, [p1]; \ - ldp x4, x5, [p2]; \ - adds x0, x0, x4; \ - adcs x1, x1, x5; \ - ldp x2, x3, [p1+16]; \ - ldp x6, x7, [p2+16]; \ - adcs x2, x2, x6; \ - adcs x3, x3, x7; \ - ldr x4, [p1+32]; \ - ldr x5, [p2+32]; \ - adc x4, x4, x5; \ - cmn x3, x3; \ - bic x3, x3, #0x8000000000000000; \ - adc x8, x4, x4; \ - mov x7, #19; \ - mul x11, x7, x8; \ - adds x0, x0, x11; \ - adcs x1, x1, xzr; \ - adcs x2, x2, xzr; \ - adc x3, x3, xzr; \ - stp x0, x1, [p0]; \ - stp x2, x3, [p0+16] - -// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// Modular addition with double modulus 2 * p_25519 = 2^256 - 38. // This only ensures that the result fits in 4 digits, not that it is reduced // even w.r.t. double modulus. The result is always correct modulo provided // the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided @@ -737,40 +558,6 @@ stp x5, x6, [p0]; \ stp x7, x8, [p0+16] -// 5-digit subtraction with upward bias to make it positive, adding -// 1000 * (2^255 - 19) = 2^256 * 500 - 19000, then normalizing to 4 digits - -#define sub5_4(p0,p1,p2) \ - ldp x0, x1, [p1]; \ - ldp x4, x5, [p2]; \ - subs x0, x0, x4; \ - sbcs x1, x1, x5; \ - ldp x2, x3, [p1+16]; \ - ldp x6, x7, [p2+16]; \ - sbcs x2, x2, x6; \ - sbcs x3, x3, x7; \ - ldr x4, [p1+32]; \ - ldr x5, [p2+32]; \ - sbc x4, x4, x5; \ - mov x7, -19000; \ - adds x0, x0, x7; \ - sbcs x1, x1, xzr; \ - sbcs x2, x2, xzr; \ - sbcs x3, x3, xzr; \ - mov x7, 499; \ - adc x4, x4, x7; \ - cmn x3, x3; \ - bic x3, x3, #0x8000000000000000; \ - adc x8, x4, x4; \ - mov x7, #19; \ - mul x11, x7, x8; \ - adds x0, x0, x11; \ - adcs x1, x1, xzr; \ - adcs x2, x2, xzr; \ - adc x3, x3, xzr; \ - stp x0, x1, [p0]; \ - stp x2, x3, [p0+16] - // Combined z = c * x + y with reduction only < 2 * p_25519 // where c is initially in the X1 register. It is assumed // that 19 * (c * x + y) < 2^60 * 2^256 so we don't need a @@ -899,7 +686,7 @@ scalarloop: // ADDING: dmsn = dm * sn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) - mul_5(dmsn,sn,dm) + mul_4(dmsn,sn,dm) lsr x0, i, #6 ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly @@ -914,7 +701,7 @@ scalarloop: // ADDING: dnsm = sm * dn - mul_5(dnsm,sm,dn) + mul_4(dnsm,sm,dn) // DOUBLING: d = (xt - zt)^2 @@ -923,9 +710,9 @@ scalarloop: // ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 // DOUBLING: s = (xt + zt)^2 - sub5_4(dpro,dmsn,dnsm) + sub_twice4(dpro,dmsn,dnsm) sqr_4(s,s) - add5_4(spro,dmsn,dnsm) + add_twice4(spro,dmsn,dnsm) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 97e9ddc2cd..046d56122e 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -41,7 +41,6 @@ #define resx res, #0 // Pointer-offset pairs for temporaries on stack with some aliasing. -// Both dmsn and dnsm need space for >= 5 digits, and we allocate 8 #define scalar sp, #(0*NUMSIZE) @@ -62,18 +61,18 @@ #define dmsn sp, #(6*NUMSIZE) #define p sp, #(6*NUMSIZE) -#define xm sp, #(8*NUMSIZE) -#define dnsm sp, #(8*NUMSIZE) -#define spro sp, #(8*NUMSIZE) +#define xm sp, #(7*NUMSIZE) +#define dnsm sp, #(7*NUMSIZE) +#define spro sp, #(7*NUMSIZE) -#define xn sp, #(10*NUMSIZE) -#define s sp, #(10*NUMSIZE) +#define d sp, #(8*NUMSIZE) -#define d sp, #(11*NUMSIZE) +#define xn sp, #(9*NUMSIZE) +#define s sp, #(9*NUMSIZE) // Total size to reserve on the stack -#define NSPACE (12*NUMSIZE) +#define NSPACE (10*NUMSIZE) // Macro wrapping up the basic field operation bignum_mul_p25519_alt, only // trivially different from a pure function call to that subroutine. @@ -283,99 +282,6 @@ stp x12, x13, [P0]; \ stp x14, x15, [P0+16] -// Multiplication just giving a 5-digit result (actually < 39 * 2^256) -// by not doing anything beyond the first stage of reduction - -#define mul_5(p0,p1,p2) \ - ldp x3, x4, [p1]; \ - ldp x7, x8, [p2]; \ - mul x12, x3, x7; \ - umulh x13, x3, x7; \ - mul x11, x3, x8; \ - umulh x14, x3, x8; \ - adds x13, x13, x11; \ - ldp x9, x10, [p2+16]; \ - mul x11, x3, x9; \ - umulh x15, x3, x9; \ - adcs x14, x14, x11; \ - mul x11, x3, x10; \ - umulh x16, x3, x10; \ - adcs x15, x15, x11; \ - adc x16, x16, xzr; \ - ldp x5, x6, [p1+16]; \ - mul x11, x4, x7; \ - adds x13, x13, x11; \ - mul x11, x4, x8; \ - adcs x14, x14, x11; \ - mul x11, x4, x9; \ - adcs x15, x15, x11; \ - mul x11, x4, x10; \ - adcs x16, x16, x11; \ - umulh x3, x4, x10; \ - adc x3, x3, xzr; \ - umulh x11, x4, x7; \ - adds x14, x14, x11; \ - umulh x11, x4, x8; \ - adcs x15, x15, x11; \ - umulh x11, x4, x9; \ - adcs x16, x16, x11; \ - adc x3, x3, xzr; \ - mul x11, x5, x7; \ - adds x14, x14, x11; \ - mul x11, x5, x8; \ - adcs x15, x15, x11; \ - mul x11, x5, x9; \ - adcs x16, x16, x11; \ - mul x11, x5, x10; \ - adcs x3, x3, x11; \ - umulh x4, x5, x10; \ - adc x4, x4, xzr; \ - umulh x11, x5, x7; \ - adds x15, x15, x11; \ - umulh x11, x5, x8; \ - adcs x16, x16, x11; \ - umulh x11, x5, x9; \ - adcs x3, x3, x11; \ - adc x4, x4, xzr; \ - mul x11, x6, x7; \ - adds x15, x15, x11; \ - mul x11, x6, x8; \ - adcs x16, x16, x11; \ - mul x11, x6, x9; \ - adcs x3, x3, x11; \ - mul x11, x6, x10; \ - adcs x4, x4, x11; \ - umulh x5, x6, x10; \ - adc x5, x5, xzr; \ - umulh x11, x6, x7; \ - adds x16, x16, x11; \ - umulh x11, x6, x8; \ - adcs x3, x3, x11; \ - umulh x11, x6, x9; \ - adcs x4, x4, x11; \ - adc x5, x5, xzr; \ - mov x7, #38; \ - mul x11, x7, x16; \ - umulh x9, x7, x16; \ - adds x12, x12, x11; \ - mul x11, x7, x3; \ - umulh x3, x7, x3; \ - adcs x13, x13, x11; \ - mul x11, x7, x4; \ - umulh x4, x7, x4; \ - adcs x14, x14, x11; \ - mul x11, x7, x5; \ - umulh x5, x7, x5; \ - adcs x15, x15, x11; \ - cset x16, hs; \ - adds x13, x13, x9; \ - adcs x14, x14, x3; \ - adcs x15, x15, x4; \ - adc x16, x16, x5; \ - stp x12, x13, [p0]; \ - stp x14, x15, [p0+16]; \ - str x16, [p0+32] - // Squaring just giving a result < 2 * p_25519, which is done by // basically skipping the +1 in the quotient estimate and the final // optional correction. @@ -456,33 +362,7 @@ stp x8, x9, [p0]; \ stp x10, x11, [p0+16] -// Add 5-digit inputs and normalize to 4 digits - -#define add5_4(p0,p1,p2) \ - ldp x0, x1, [p1]; \ - ldp x4, x5, [p2]; \ - adds x0, x0, x4; \ - adcs x1, x1, x5; \ - ldp x2, x3, [p1+16]; \ - ldp x6, x7, [p2+16]; \ - adcs x2, x2, x6; \ - adcs x3, x3, x7; \ - ldr x4, [p1+32]; \ - ldr x5, [p2+32]; \ - adc x4, x4, x5; \ - cmn x3, x3; \ - bic x3, x3, #0x8000000000000000; \ - adc x8, x4, x4; \ - mov x7, #19; \ - mul x11, x7, x8; \ - adds x0, x0, x11; \ - adcs x1, x1, xzr; \ - adcs x2, x2, xzr; \ - adc x3, x3, xzr; \ - stp x0, x1, [p0]; \ - stp x2, x3, [p0+16] - -// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// Modular addition with double modulus 2 * p_25519 = 2^256 - 38. // This only ensures that the result fits in 4 digits, not that it is reduced // even w.r.t. double modulus. The result is always correct modulo provided // the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided @@ -526,40 +406,6 @@ stp x5, x6, [p0]; \ stp x7, x8, [p0+16] -// 5-digit subtraction with upward bias to make it positive, adding -// 1000 * (2^255 - 19) = 2^256 * 500 - 19000, then normalizing to 4 digits - -#define sub5_4(p0,p1,p2) \ - ldp x0, x1, [p1]; \ - ldp x4, x5, [p2]; \ - subs x0, x0, x4; \ - sbcs x1, x1, x5; \ - ldp x2, x3, [p1+16]; \ - ldp x6, x7, [p2+16]; \ - sbcs x2, x2, x6; \ - sbcs x3, x3, x7; \ - ldr x4, [p1+32]; \ - ldr x5, [p2+32]; \ - sbc x4, x4, x5; \ - mov x7, -19000; \ - adds x0, x0, x7; \ - sbcs x1, x1, xzr; \ - sbcs x2, x2, xzr; \ - sbcs x3, x3, xzr; \ - mov x7, 499; \ - adc x4, x4, x7; \ - cmn x3, x3; \ - bic x3, x3, #0x8000000000000000; \ - adc x8, x4, x4; \ - mov x7, #19; \ - mul x11, x7, x8; \ - adds x0, x0, x11; \ - adcs x1, x1, xzr; \ - adcs x2, x2, xzr; \ - adc x3, x3, xzr; \ - stp x0, x1, [p0]; \ - stp x2, x3, [p0+16] - // Combined z = c * x + y with reduction only < 2 * p_25519 // where c is initially in the X1 register. It is assumed // that 19 * (c * x + y) < 2^60 * 2^256 so we don't need a @@ -688,7 +534,7 @@ scalarloop: // ADDING: dmsn = dm * sn // DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) - mul_5(dmsn,sn,dm) + mul_4(dmsn,sn,dm) lsr x0, i, #6 ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly @@ -703,7 +549,7 @@ scalarloop: // ADDING: dnsm = sm * dn - mul_5(dnsm,sm,dn) + mul_4(dnsm,sm,dn) // DOUBLING: d = (xt - zt)^2 @@ -712,9 +558,9 @@ scalarloop: // ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 // DOUBLING: s = (xt + zt)^2 - sub5_4(dpro,dmsn,dnsm) + sub_twice4(dpro,dmsn,dnsm) sqr_4(s,s) - add5_4(spro,dmsn,dnsm) + add_twice4(spro,dmsn,dnsm) sqr_4(dpro,dpro) // DOUBLING: p = 4 * xt * zt = s - d From 04a48dd18d9b6ccca1706532da807f1d63503ad0 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 9 Mar 2023 08:33:40 -0800 Subject: [PATCH 29/42] Propagate field operation improvements to ARM X25519 functions Again, the analogous change on x86 seems a less clear benefit. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/389bc7214ffa107f352947be7399b7d039c0bca0 --- arm/curve25519/curve25519_x25519.S | 438 ++++++++++++--------- arm/curve25519/curve25519_x25519_alt.S | 70 ++-- arm/curve25519/curve25519_x25519base.S | 204 ++++++---- arm/curve25519/curve25519_x25519base_alt.S | 34 +- 4 files changed, 415 insertions(+), 331 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index e1d17f4a0a..fadc512553 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -77,11 +77,21 @@ // Macro wrapping up the basic field operation bignum_mul_p25519, only // trivially different from a pure function call to that subroutine. -#define mul_p25519(p0,p1,p2) \ - ldp x3, x4, [p1]; \ - ldp x5, x6, [p2]; \ - mul x7, x3, x5; \ - umulh x8, x3, x5; \ +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ mul x9, x4, x6; \ umulh x10, x4, x6; \ subs x4, x4, x3; \ @@ -103,10 +113,20 @@ eor x3, x3, x16; \ adcs x9, x3, x9; \ adc x10, x10, x16; \ - ldp x3, x4, [p1+16]; \ - ldp x5, x6, [p2+16]; \ - mul x11, x3, x5; \ - umulh x12, x3, x5; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ mul x13, x4, x6; \ umulh x14, x4, x6; \ subs x4, x4, x3; \ @@ -128,12 +148,12 @@ eor x3, x3, x16; \ adcs x13, x3, x13; \ adc x14, x14, x16; \ - ldp x3, x4, [p1+16]; \ - ldp x15, x16, [p1]; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ subs x3, x3, x15; \ sbcs x4, x4, x16; \ csetm x16, cc; \ - ldp x15, x0, [p2]; \ + ldp x15, x0, [P2]; \ subs x5, x15, x5; \ sbcs x6, x0, x6; \ csetm x0, cc; \ @@ -191,54 +211,53 @@ adcs x13, x13, x16; \ adc x14, x14, x16; \ mov x3, #0x26; \ - and x5, x11, #0xffffffff; \ - lsr x4, x11, #32; \ - mul x5, x3, x5; \ - mul x4, x3, x4; \ - adds x7, x7, x5; \ - and x5, x12, #0xffffffff; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ lsr x12, x12, #32; \ - mul x5, x3, x5; \ - mul x12, x3, x12; \ - adcs x8, x8, x5; \ - and x5, x13, #0xffffffff; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ lsr x13, x13, #32; \ - mul x5, x3, x5; \ - mul x13, x3, x13; \ - adcs x9, x9, x5; \ - and x5, x14, #0xffffffff; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ lsr x14, x14, #32; \ - mul x5, x3, x5; \ - mul x14, x3, x14; \ - adcs x10, x10, x5; \ - cset x11, cs; \ - lsl x5, x4, #32; \ - adds x7, x7, x5; \ - extr x5, x12, x4, #32; \ - adcs x8, x8, x5; \ - extr x5, x13, x12, #32; \ - adcs x9, x9, x5; \ - extr x5, x14, x13, #32; \ - adcs x10, x10, x5; \ - lsr x5, x14, #32; \ - adc x11, x11, x5; \ - cmn x10, x10; \ - orr x10, x10, #0x8000000000000000; \ - adc x0, x11, x11; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umaddl x5, w5, w0, x5; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ mov x3, #0x13; \ - madd x5, x3, x0, x3; \ - adds x7, x7, x5; \ - adcs x8, x8, xzr; \ - adcs x9, x9, xzr; \ - adcs x10, x10, xzr; \ - csel x3, x3, xzr, cc; \ + tst x10, #0x8000000000000000; \ + csel x3, x3, xzr, pl; \ subs x7, x7, x3; \ sbcs x8, x8, xzr; \ sbcs x9, x9, xzr; \ sbc x10, x10, xzr; \ and x10, x10, #0x7fffffffffffffff; \ - stp x7, x8, [p0]; \ - stp x9, x10, [p0+16] + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] // A version of multiplication that only guarantees output < 2 * p_25519. // This basically skips the +1 and final correction in quotient estimation. @@ -246,8 +265,18 @@ #define mul_4(P0,P1,P2) \ ldp x3, x4, [P1]; \ ldp x5, x6, [P2]; \ - mul x7, x3, x5; \ - umulh x8, x3, x5; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ mul x9, x4, x6; \ umulh x10, x4, x6; \ subs x4, x4, x3; \ @@ -271,8 +300,18 @@ adc x10, x10, x16; \ ldp x3, x4, [P1+16]; \ ldp x5, x6, [P2+16]; \ - mul x11, x3, x5; \ - umulh x12, x3, x5; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ mul x13, x4, x6; \ umulh x14, x4, x6; \ subs x4, x4, x3; \ @@ -357,46 +396,43 @@ adcs x13, x13, x16; \ adc x14, x14, x16; \ mov x3, #0x26; \ - and x5, x11, #0xffffffff; \ - lsr x4, x11, #32; \ - mul x5, x3, x5; \ - mul x4, x3, x4; \ - adds x7, x7, x5; \ - and x5, x12, #0xffffffff; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ lsr x12, x12, #32; \ - mul x5, x3, x5; \ - mul x12, x3, x12; \ - adcs x8, x8, x5; \ - and x5, x13, #0xffffffff; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ lsr x13, x13, #32; \ - mul x5, x3, x5; \ - mul x13, x3, x13; \ - adcs x9, x9, x5; \ - and x5, x14, #0xffffffff; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ lsr x14, x14, #32; \ - mul x5, x3, x5; \ - mul x14, x3, x14; \ - adcs x10, x10, x5; \ - cset x11, cs; \ - lsl x5, x4, #32; \ - adds x7, x7, x5; \ - extr x5, x12, x4, #32; \ - adcs x8, x8, x5; \ - extr x5, x13, x12, #32; \ - adcs x9, x9, x5; \ - extr x5, x14, x13, #32; \ - adcs x10, x10, x5; \ - lsr x5, x14, #32; \ - adc x11, x11, x5; \ - cmn x10, x10; \ - bic x10, x10, #0x8000000000000000; \ - adc x0, x11, x11; \ - mov x3, #19; \ - mul x5, x3, x0; \ - adds x7, x7, x5; \ - adcs x8, x8, xzr; \ - adcs x9, x9, xzr; \ - adc x10, x10, xzr; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umull x5, w5, w0; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ stp x7, x8, [P0]; \ stp x9, x10, [P0+16] @@ -404,115 +440,137 @@ // basically skipping the +1 in the quotient estimate and the final // optional correction. -#define sqr_4(p0,p1) \ - ldp x6, x7, [p1]; \ - ldp x10, x11, [p1+16]; \ - mul x4, x6, x10; \ - mul x9, x7, x11; \ - umulh x12, x6, x10; \ - subs x13, x6, x7; \ - cneg x13, x13, cc; \ - csetm x3, cc; \ - subs x2, x11, x10; \ - cneg x2, x2, cc; \ - mul x8, x13, x2; \ - umulh x2, x13, x2; \ - cinv x3, x3, cc; \ - eor x8, x8, x3; \ - eor x2, x2, x3; \ - adds x5, x4, x12; \ - adc x12, x12, xzr; \ - umulh x13, x7, x11; \ - adds x5, x5, x9; \ - adcs x12, x12, x13; \ - adc x13, x13, xzr; \ - adds x12, x12, x9; \ - adc x13, x13, xzr; \ - cmn x3, #0x1; \ - adcs x5, x5, x8; \ - adcs x12, x12, x2; \ - adc x13, x13, x3; \ - adds x4, x4, x4; \ - adcs x5, x5, x5; \ - adcs x12, x12, x12; \ - adcs x13, x13, x13; \ - adc x14, xzr, xzr; \ - mul x2, x6, x6; \ - mul x8, x7, x7; \ - mul x15, x6, x7; \ - umulh x3, x6, x6; \ - umulh x9, x7, x7; \ - umulh x16, x6, x7; \ - adds x3, x3, x15; \ - adcs x8, x8, x16; \ - adc x9, x9, xzr; \ - adds x3, x3, x15; \ - adcs x8, x8, x16; \ - adc x9, x9, xzr; \ - adds x4, x4, x8; \ - adcs x5, x5, x9; \ - adcs x12, x12, xzr; \ - adcs x13, x13, xzr; \ - adc x14, x14, xzr; \ - mul x6, x10, x10; \ - mul x8, x11, x11; \ +#define sqr_4(P0,P1) \ + ldp x10, x11, [P1]; \ + ldp x12, x13, [P1+16]; \ + umull x2, w10, w10; \ + lsr x14, x10, #32; \ + umull x3, w14, w14; \ + umull x14, w10, w14; \ + adds x2, x2, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x3, x3, x14; \ + umull x4, w11, w11; \ + lsr x14, x11, #32; \ + umull x5, w14, w14; \ + umull x14, w11, w14; \ mul x15, x10, x11; \ - umulh x7, x10, x10; \ - umulh x9, x11, x11; \ umulh x16, x10, x11; \ - adds x7, x7, x15; \ - adcs x8, x8, x16; \ + adds x4, x4, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x5, x5, x14; \ + adds x15, x15, x15; \ + adcs x16, x16, x16; \ + adc x5, x5, xzr; \ + adds x3, x3, x15; \ + adcs x4, x4, x16; \ + adc x5, x5, xzr; \ + umull x6, w12, w12; \ + lsr x14, x12, #32; \ + umull x7, w14, w14; \ + umull x14, w12, w14; \ + adds x6, x6, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x7, x7, x14; \ + umull x8, w13, w13; \ + lsr x14, x13, #32; \ + umull x9, w14, w14; \ + umull x14, w13, w14; \ + mul x15, x12, x13; \ + umulh x16, x12, x13; \ + adds x8, x8, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x9, x9, x14; \ + adds x15, x15, x15; \ + adcs x16, x16, x16; \ adc x9, x9, xzr; \ adds x7, x7, x15; \ adcs x8, x8, x16; \ adc x9, x9, xzr; \ - adds x6, x6, x12; \ - adcs x7, x7, x13; \ - adcs x8, x8, x14; \ + subs x10, x10, x12; \ + sbcs x11, x11, x13; \ + csetm x16, cc; \ + eor x10, x10, x16; \ + subs x10, x10, x16; \ + eor x11, x11, x16; \ + sbc x11, x11, x16; \ + adds x6, x6, x4; \ + adcs x7, x7, x5; \ + adcs x8, x8, xzr; \ adc x9, x9, xzr; \ + umull x12, w10, w10; \ + lsr x5, x10, #32; \ + umull x13, w5, w5; \ + umull x5, w10, w5; \ + adds x12, x12, x5, lsl #33; \ + lsr x5, x5, #31; \ + adc x13, x13, x5; \ + umull x15, w11, w11; \ + lsr x5, x11, #32; \ + umull x14, w5, w5; \ + umull x5, w11, w5; \ + mul x4, x10, x11; \ + umulh x16, x10, x11; \ + adds x15, x15, x5, lsl #33; \ + lsr x5, x5, #31; \ + adc x14, x14, x5; \ + adds x4, x4, x4; \ + adcs x16, x16, x16; \ + adc x14, x14, xzr; \ + adds x13, x13, x4; \ + adcs x15, x15, x16; \ + adc x14, x14, xzr; \ + adds x4, x2, x6; \ + adcs x5, x3, x7; \ + adcs x6, x6, x8; \ + adcs x7, x7, x9; \ + csetm x16, cc; \ + subs x4, x4, x12; \ + sbcs x5, x5, x13; \ + sbcs x6, x6, x15; \ + sbcs x7, x7, x14; \ + adcs x8, x8, x16; \ + adc x9, x9, x16; \ mov x10, #0x26; \ - and x11, x6, #0xffffffff; \ - lsr x12, x6, #32; \ - mul x11, x10, x11; \ - mul x12, x10, x12; \ - adds x2, x2, x11; \ - and x11, x7, #0xffffffff; \ + umull x12, w6, w10; \ + add x12, x12, w2, uxtw; \ + lsr x2, x2, #32; \ + lsr x6, x6, #32; \ + umaddl x6, w6, w10, x2; \ + mov x2, x12; \ + umull x12, w7, w10; \ + add x12, x12, w3, uxtw; \ + lsr x3, x3, #32; \ lsr x7, x7, #32; \ - mul x11, x10, x11; \ - mul x7, x10, x7; \ - adcs x3, x3, x11; \ - and x11, x8, #0xffffffff; \ + umaddl x7, w7, w10, x3; \ + mov x3, x12; \ + umull x12, w8, w10; \ + add x12, x12, w4, uxtw; \ + lsr x4, x4, #32; \ lsr x8, x8, #32; \ - mul x11, x10, x11; \ - mul x8, x10, x8; \ - adcs x4, x4, x11; \ - and x11, x9, #0xffffffff; \ + umaddl x8, w8, w10, x4; \ + mov x4, x12; \ + umull x12, w9, w10; \ + add x12, x12, w5, uxtw; \ + lsr x5, x5, #32; \ lsr x9, x9, #32; \ - mul x11, x10, x11; \ - mul x9, x10, x9; \ - adcs x5, x5, x11; \ - cset x6, cs; \ - lsl x11, x12, #32; \ - adds x2, x2, x11; \ - extr x11, x7, x12, #32; \ - adcs x3, x3, x11; \ - extr x11, x8, x7, #32; \ - adcs x4, x4, x11; \ - extr x11, x9, x8, #32; \ - adcs x5, x5, x11; \ - lsr x11, x9, #32; \ - adc x6, x6, x11; \ - cmn x5, x5; \ - bic x5, x5, #0x8000000000000000; \ - adc x13, x6, x6; \ - mov x10, #0x13; \ - mul x11, x13, x10; \ - adds x2, x2, x11; \ - adcs x3, x3, xzr; \ - adcs x4, x4, xzr; \ - adc x5, x5, xzr; \ - stp x2, x3, [p0]; \ - stp x4, x5, [p0+16] + umaddl x9, w9, w10, x5; \ + mov x5, x12; \ + lsr x13, x9, #31; \ + mov x11, #0x13; \ + umull x11, w11, w13; \ + add x2, x2, x11; \ + adds x2, x2, x6, lsl #32; \ + extr x10, x7, x6, #32; \ + adcs x3, x3, x10; \ + extr x10, x8, x7, #32; \ + adcs x4, x4, x10; \ + extr x10, x9, x8, #32; \ + lsl x11, x13, #63; \ + eor x5, x5, x11; \ + adc x5, x5, x10; \ + stp x2, x3, [P0]; \ + stp x4, x5, [P0+16] // Modular addition with double modulus 2 * p_25519 = 2^256 - 38. // This only ensures that the result fits in 4 digits, not that it is reduced diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 046d56122e..8072c0fe7f 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -77,15 +77,15 @@ // Macro wrapping up the basic field operation bignum_mul_p25519_alt, only // trivially different from a pure function call to that subroutine. -#define mul_p25519(p0,p1,p2) \ - ldp x3, x4, [p1]; \ - ldp x7, x8, [p2]; \ +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ mul x12, x3, x7; \ umulh x13, x3, x7; \ mul x11, x3, x8; \ umulh x14, x3, x8; \ adds x13, x13, x11; \ - ldp x9, x10, [p2+16]; \ + ldp x9, x10, [P2+16]; \ mul x11, x3, x9; \ umulh x15, x3, x9; \ adcs x14, x14, x11; \ @@ -93,7 +93,7 @@ umulh x16, x3, x10; \ adcs x15, x15, x11; \ adc x16, x16, xzr; \ - ldp x5, x6, [p1+16]; \ + ldp x5, x6, [P1+16]; \ mul x11, x4, x7; \ adds x13, x13, x11; \ mul x11, x4, x8; \ @@ -145,7 +145,7 @@ umulh x11, x6, x9; \ adcs x4, x4, x11; \ adc x5, x5, xzr; \ - mov x7, #38; \ + mov x7, #0x26; \ mul x11, x7, x16; \ umulh x9, x7, x16; \ adds x12, x12, x11; \ @@ -158,28 +158,26 @@ mul x11, x7, x5; \ umulh x5, x7, x5; \ adcs x15, x15, x11; \ - cset x16, hs; \ - adds x13, x13, x9; \ - adcs x14, x14, x3; \ - adcs x15, x15, x4; \ + cset x16, cs; \ + adds x15, x15, x4; \ adc x16, x16, x5; \ cmn x15, x15; \ orr x15, x15, #0x8000000000000000; \ adc x8, x16, x16; \ - mov x7, #19; \ + mov x7, #0x13; \ madd x11, x7, x8, x7; \ adds x12, x12, x11; \ - adcs x13, x13, xzr; \ - adcs x14, x14, xzr; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ adcs x15, x15, xzr; \ - csel x7, x7, xzr, lo; \ + csel x7, x7, xzr, cc; \ subs x12, x12, x7; \ sbcs x13, x13, xzr; \ sbcs x14, x14, xzr; \ sbc x15, x15, xzr; \ and x15, x15, #0x7fffffffffffffff; \ - stp x12, x13, [p0]; \ - stp x14, x15, [p0+16] + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] // A version of multiplication that only guarantees output < 2 * p_25519. // This basically skips the +1 and final correction in quotient estimation. @@ -252,7 +250,7 @@ umulh x11, x6, x9; \ adcs x4, x4, x11; \ adc x5, x5, xzr; \ - mov x7, #38; \ + mov x7, #0x26; \ mul x11, x7, x16; \ umulh x9, x7, x16; \ adds x12, x12, x11; \ @@ -265,19 +263,17 @@ mul x11, x7, x5; \ umulh x5, x7, x5; \ adcs x15, x15, x11; \ - cset x16, hs; \ - adds x13, x13, x9; \ - adcs x14, x14, x3; \ - adcs x15, x15, x4; \ + cset x16, cs; \ + adds x15, x15, x4; \ adc x16, x16, x5; \ cmn x15, x15; \ bic x15, x15, #0x8000000000000000; \ adc x8, x16, x16; \ - mov x7, #19; \ + mov x7, #0x13; \ mul x11, x7, x8; \ adds x12, x12, x11; \ - adcs x13, x13, xzr; \ - adcs x14, x14, xzr; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ adc x15, x15, xzr; \ stp x12, x13, [P0]; \ stp x14, x15, [P0+16] @@ -286,11 +282,11 @@ // basically skipping the +1 in the quotient estimate and the final // optional correction. -#define sqr_4(p0,p1) \ - ldp x2, x3, [p1]; \ +#define sqr_4(P0,P1) \ + ldp x2, x3, [P1]; \ mul x9, x2, x3; \ umulh x10, x2, x3; \ - ldp x4, x5, [p1+16]; \ + ldp x4, x5, [P1+16]; \ mul x11, x2, x5; \ umulh x12, x2, x5; \ mul x7, x2, x4; \ @@ -316,7 +312,7 @@ adcs x12, x12, x12; \ adcs x13, x13, x13; \ adcs x14, x14, x14; \ - cset x6, hs; \ + cset x6, cs; \ umulh x7, x2, x2; \ mul x8, x2, x2; \ adds x9, x9, x7; \ @@ -332,7 +328,7 @@ adcs x14, x14, x7; \ umulh x7, x5, x5; \ adc x6, x6, x7; \ - mov x3, #38; \ + mov x3, #0x26; \ mul x7, x3, x12; \ umulh x4, x3, x12; \ adds x8, x8, x7; \ @@ -345,22 +341,20 @@ mul x7, x3, x6; \ umulh x6, x3, x6; \ adcs x11, x11, x7; \ - cset x12, hs; \ - adds x9, x9, x4; \ - adcs x10, x10, x13; \ - adcs x11, x11, x14; \ + cset x12, cs; \ + adds x11, x11, x14; \ adc x12, x12, x6; \ cmn x11, x11; \ bic x11, x11, #0x8000000000000000; \ adc x2, x12, x12; \ - mov x3, #19; \ + mov x3, #0x13; \ mul x7, x3, x2; \ adds x8, x8, x7; \ - adcs x9, x9, xzr; \ - adcs x10, x10, xzr; \ + adcs x9, x9, x4; \ + adcs x10, x10, x13; \ adc x11, x11, xzr; \ - stp x8, x9, [p0]; \ - stp x10, x11, [p0+16] + stp x8, x9, [P0]; \ + stp x10, x11, [P0+16] // Modular addition with double modulus 2 * p_25519 = 2^256 - 38. // This only ensures that the result fits in 4 digits, not that it is reduced diff --git a/arm/curve25519/curve25519_x25519base.S b/arm/curve25519/curve25519_x25519base.S index 7076630698..41d5f5b414 100644 --- a/arm/curve25519/curve25519_x25519base.S +++ b/arm/curve25519/curve25519_x25519base.S @@ -74,14 +74,24 @@ #define NSPACE (14*NUMSIZE) -// Macro wrapping up the basic field multiplication, only trivially -// different from a pure function call to bignum_mul_p25519. +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. #define mul_p25519(P0,P1,P2) \ ldp x3, x4, [P1]; \ ldp x5, x6, [P2]; \ - mul x7, x3, x5; \ - umulh x8, x3, x5; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ mul x9, x4, x6; \ umulh x10, x4, x6; \ subs x4, x4, x3; \ @@ -105,8 +115,18 @@ adc x10, x10, x16; \ ldp x3, x4, [P1+16]; \ ldp x5, x6, [P2+16]; \ - mul x11, x3, x5; \ - umulh x12, x3, x5; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ mul x13, x4, x6; \ umulh x14, x4, x6; \ subs x4, x4, x3; \ @@ -191,47 +211,46 @@ adcs x13, x13, x16; \ adc x14, x14, x16; \ mov x3, #0x26; \ - and x5, x11, #0xffffffff; \ - lsr x4, x11, #32; \ - mul x5, x3, x5; \ - mul x4, x3, x4; \ - adds x7, x7, x5; \ - and x5, x12, #0xffffffff; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ lsr x12, x12, #32; \ - mul x5, x3, x5; \ - mul x12, x3, x12; \ - adcs x8, x8, x5; \ - and x5, x13, #0xffffffff; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ lsr x13, x13, #32; \ - mul x5, x3, x5; \ - mul x13, x3, x13; \ - adcs x9, x9, x5; \ - and x5, x14, #0xffffffff; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ lsr x14, x14, #32; \ - mul x5, x3, x5; \ - mul x14, x3, x14; \ - adcs x10, x10, x5; \ - cset x11, cs; \ - lsl x5, x4, #32; \ - adds x7, x7, x5; \ - extr x5, x12, x4, #32; \ - adcs x8, x8, x5; \ - extr x5, x13, x12, #32; \ - adcs x9, x9, x5; \ - extr x5, x14, x13, #32; \ - adcs x10, x10, x5; \ - lsr x5, x14, #32; \ - adc x11, x11, x5; \ - cmn x10, x10; \ - orr x10, x10, #0x8000000000000000; \ - adc x0, x11, x11; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umaddl x5, w5, w0, x5; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ mov x3, #0x13; \ - madd x5, x3, x0, x3; \ - adds x7, x7, x5; \ - adcs x8, x8, xzr; \ - adcs x9, x9, xzr; \ - adcs x10, x10, xzr; \ - csel x3, x3, xzr, cc; \ + tst x10, #0x8000000000000000; \ + csel x3, x3, xzr, pl; \ subs x7, x7, x3; \ sbcs x8, x8, xzr; \ sbcs x9, x9, xzr; \ @@ -246,8 +265,18 @@ #define mul_4(P0,P1,P2) \ ldp x3, x4, [P1]; \ ldp x5, x6, [P2]; \ - mul x7, x3, x5; \ - umulh x8, x3, x5; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ mul x9, x4, x6; \ umulh x10, x4, x6; \ subs x4, x4, x3; \ @@ -271,8 +300,18 @@ adc x10, x10, x16; \ ldp x3, x4, [P1+16]; \ ldp x5, x6, [P2+16]; \ - mul x11, x3, x5; \ - umulh x12, x3, x5; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ mul x13, x4, x6; \ umulh x14, x4, x6; \ subs x4, x4, x3; \ @@ -357,46 +396,43 @@ adcs x13, x13, x16; \ adc x14, x14, x16; \ mov x3, #0x26; \ - and x5, x11, #0xffffffff; \ - lsr x4, x11, #32; \ - mul x5, x3, x5; \ - mul x4, x3, x4; \ - adds x7, x7, x5; \ - and x5, x12, #0xffffffff; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ lsr x12, x12, #32; \ - mul x5, x3, x5; \ - mul x12, x3, x12; \ - adcs x8, x8, x5; \ - and x5, x13, #0xffffffff; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ lsr x13, x13, #32; \ - mul x5, x3, x5; \ - mul x13, x3, x13; \ - adcs x9, x9, x5; \ - and x5, x14, #0xffffffff; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ lsr x14, x14, #32; \ - mul x5, x3, x5; \ - mul x14, x3, x14; \ - adcs x10, x10, x5; \ - cset x11, cs; \ - lsl x5, x4, #32; \ - adds x7, x7, x5; \ - extr x5, x12, x4, #32; \ - adcs x8, x8, x5; \ - extr x5, x13, x12, #32; \ - adcs x9, x9, x5; \ - extr x5, x14, x13, #32; \ - adcs x10, x10, x5; \ - lsr x5, x14, #32; \ - adc x11, x11, x5; \ - cmn x10, x10; \ - bic x10, x10, #0x8000000000000000; \ - adc x0, x11, x11; \ - mov x3, #19; \ - mul x5, x3, x0; \ - adds x7, x7, x5; \ - adcs x8, x8, xzr; \ - adcs x9, x9, xzr; \ - adc x10, x10, xzr; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umull x5, w5, w0; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ stp x7, x8, [P0]; \ stp x9, x10, [P0+16] diff --git a/arm/curve25519/curve25519_x25519base_alt.S b/arm/curve25519/curve25519_x25519base_alt.S index 0631ac0279..cb8354c824 100644 --- a/arm/curve25519/curve25519_x25519base_alt.S +++ b/arm/curve25519/curve25519_x25519base_alt.S @@ -74,8 +74,8 @@ #define NSPACE (14*NUMSIZE) -// Macro wrapping up the basic field multiplication, only trivially -// different from a pure function call to bignum_mul_p25519_alt. +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. #define mul_p25519(P0,P1,P2) \ ldp x3, x4, [P1]; \ @@ -145,7 +145,7 @@ umulh x11, x6, x9; \ adcs x4, x4, x11; \ adc x5, x5, xzr; \ - mov x7, #38; \ + mov x7, #0x26; \ mul x11, x7, x16; \ umulh x9, x7, x16; \ adds x12, x12, x11; \ @@ -158,21 +158,19 @@ mul x11, x7, x5; \ umulh x5, x7, x5; \ adcs x15, x15, x11; \ - cset x16, hs; \ - adds x13, x13, x9; \ - adcs x14, x14, x3; \ - adcs x15, x15, x4; \ + cset x16, cs; \ + adds x15, x15, x4; \ adc x16, x16, x5; \ cmn x15, x15; \ orr x15, x15, #0x8000000000000000; \ adc x8, x16, x16; \ - mov x7, #19; \ + mov x7, #0x13; \ madd x11, x7, x8, x7; \ adds x12, x12, x11; \ - adcs x13, x13, xzr; \ - adcs x14, x14, xzr; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ adcs x15, x15, xzr; \ - csel x7, x7, xzr, lo; \ + csel x7, x7, xzr, cc; \ subs x12, x12, x7; \ sbcs x13, x13, xzr; \ sbcs x14, x14, xzr; \ @@ -252,7 +250,7 @@ umulh x11, x6, x9; \ adcs x4, x4, x11; \ adc x5, x5, xzr; \ - mov x7, #38; \ + mov x7, #0x26; \ mul x11, x7, x16; \ umulh x9, x7, x16; \ adds x12, x12, x11; \ @@ -265,19 +263,17 @@ mul x11, x7, x5; \ umulh x5, x7, x5; \ adcs x15, x15, x11; \ - cset x16, hs; \ - adds x13, x13, x9; \ - adcs x14, x14, x3; \ - adcs x15, x15, x4; \ + cset x16, cs; \ + adds x15, x15, x4; \ adc x16, x16, x5; \ cmn x15, x15; \ bic x15, x15, #0x8000000000000000; \ adc x8, x16, x16; \ - mov x7, #19; \ + mov x7, #0x13; \ mul x11, x7, x8; \ adds x12, x12, x11; \ - adcs x13, x13, xzr; \ - adcs x14, x14, xzr; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ adc x15, x15, xzr; \ stp x12, x13, [P0]; \ stp x14, x15, [P0+16] From b10cd6c7bae7d5e1169b80431485ebb5e264373d Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 16 Mar 2023 21:02:41 -0700 Subject: [PATCH 30/42] Add basic SM2 point operations The same trio as for the NIST curves: a point doubling function, point addition function and point mixed addition function, all using Jacobian coordinates, and all with input nondegeneracy assumed (see the formal spec for the exact assumptions). s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/1cdd6ff1a7cd77b6bc73f690ee89db41bcd787ed --- x86_att/p384/p384_montjdouble.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x86_att/p384/p384_montjdouble.S b/x86_att/p384/p384_montjdouble.S index d06d22bd8f..80e0b6cc88 100644 --- a/x86_att/p384/p384_montjdouble.S +++ b/x86_att/p384/p384_montjdouble.S @@ -913,7 +913,7 @@ S2N_BN_SYMBOL(p384_montjdouble): #endif // Save registers and make room on stack for temporary variables -// Save the outpuy pointer %rdi which gets overwritten in earlier +// Save the output pointer %rdi which gets overwritten in earlier // operations before it is used. pushq %rbx From b233c58e785b70092692389b33d3ff1114313b75 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 21 Mar 2023 16:56:34 -0700 Subject: [PATCH 31/42] Add byte-level interfaces for X25519 functions These provide alternative interfaces at the C level, with "_byte" in their names to distinguish them, treating the arguments as arrays of bytes (uint8_t) rather than of 64-bit words (uint64_t). This better reflects how the X25519 function is generally specified and used. void curve25519_x25519_byte(uint8_t res[static 32],uint8_t scalar[static 32],uint8_t point[static 32]); void curve25519_x25519_byte_alt(uint8_t res[static 32],uint8_t scalar[static 32],uint8_t point[static 32]); void curve25519_x25519base_byte(uint8_t res[static 32],uint8_t scalar[static 32]); void curve25519_x25519base_byte_alt(uint8_t res[static 32],uint8_t scalar[static 32]); The underlying code is exactly the same in the x86 case, since the platform is guaranteed to be little-endian, and the proofs just rephrase the same results in terms of byte arrays. The ARM functions are actually different code, using byte-level loads and stores (ldrb, strb) at the beginning and end, and so their proofs are also slightly different. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/6cdfdde71663913f2b505d287cad66cf7346c0f2 --- arm/curve25519/curve25519_x25519_byte.S | 1421 +++ arm/curve25519/curve25519_x25519_byte_alt.S | 1205 +++ arm/curve25519/curve25519_x25519base_byte.S | 9020 +++++++++++++++++ .../curve25519_x25519base_byte_alt.S | 8861 ++++++++++++++++ x86_att/curve25519/curve25519_x25519.S | 10 + x86_att/curve25519/curve25519_x25519_alt.S | 10 + x86_att/curve25519/curve25519_x25519base.S | 12 +- .../curve25519/curve25519_x25519base_alt.S | 10 + 8 files changed, 20548 insertions(+), 1 deletion(-) create mode 100644 arm/curve25519/curve25519_x25519_byte.S create mode 100644 arm/curve25519/curve25519_x25519_byte_alt.S create mode 100644 arm/curve25519/curve25519_x25519base_byte.S create mode 100644 arm/curve25519/curve25519_x25519base_byte_alt.S diff --git a/arm/curve25519/curve25519_x25519_byte.S b/arm/curve25519/curve25519_x25519_byte.S new file mode 100644 index 0000000000..accdf93ecf --- /dev/null +++ b/arm/curve25519/curve25519_x25519_byte.S @@ -0,0 +1,1421 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 (byte array arguments) +// Inputs scalar[32] (bytes), point[32] (bytes); output res[32] (bytes) +// +// extern void curve25519_x25519_byte +// (uint8_t res[static 32],uint8_t scalar[static 32],uint8_t point[static 32]) +// +// Given a scalar n and the X coordinate of an input point P = (X,Y) on +// curve25519 (Y can live in any extension field of characteristic 2^255-19), +// this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the +// point at infinity. Both n and X inputs are first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); +// in particular the lower three bits of n are set to zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_byte) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_byte) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable homes for the input result argument during the whole body +// and other variables that are only needed prior to the modular inverse. + +#define res x23 +#define i x20 +#define swap x21 + +// Pointers to result x coord to be written + +#define resx res, #0 + +// Pointer-offset pairs for temporaries on stack with some aliasing. + +#define scalar sp, #(0*NUMSIZE) + +#define pointx sp, #(1*NUMSIZE) + +#define zm sp, #(2*NUMSIZE) +#define sm sp, #(2*NUMSIZE) +#define dpro sp, #(2*NUMSIZE) + +#define sn sp, #(3*NUMSIZE) + +#define dm sp, #(4*NUMSIZE) + +#define zn sp, #(5*NUMSIZE) +#define dn sp, #(5*NUMSIZE) +#define e sp, #(5*NUMSIZE) + +#define dmsn sp, #(6*NUMSIZE) +#define p sp, #(6*NUMSIZE) + +#define xm sp, #(7*NUMSIZE) +#define dnsm sp, #(7*NUMSIZE) +#define spro sp, #(7*NUMSIZE) + +#define d sp, #(8*NUMSIZE) + +#define xn sp, #(9*NUMSIZE) +#define s sp, #(9*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (10*NUMSIZE) + +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umaddl x5, w5, w0, x5; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + mov x3, #0x13; \ + tst x10, #0x8000000000000000; \ + csel x3, x3, xzr, pl; \ + subs x7, x7, x3; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + and x10, x10, #0x7fffffffffffffff; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umull x5, w5, w0; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + ldp x10, x11, [P1]; \ + ldp x12, x13, [P1+16]; \ + umull x2, w10, w10; \ + lsr x14, x10, #32; \ + umull x3, w14, w14; \ + umull x14, w10, w14; \ + adds x2, x2, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x3, x3, x14; \ + umull x4, w11, w11; \ + lsr x14, x11, #32; \ + umull x5, w14, w14; \ + umull x14, w11, w14; \ + mul x15, x10, x11; \ + umulh x16, x10, x11; \ + adds x4, x4, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x5, x5, x14; \ + adds x15, x15, x15; \ + adcs x16, x16, x16; \ + adc x5, x5, xzr; \ + adds x3, x3, x15; \ + adcs x4, x4, x16; \ + adc x5, x5, xzr; \ + umull x6, w12, w12; \ + lsr x14, x12, #32; \ + umull x7, w14, w14; \ + umull x14, w12, w14; \ + adds x6, x6, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x7, x7, x14; \ + umull x8, w13, w13; \ + lsr x14, x13, #32; \ + umull x9, w14, w14; \ + umull x14, w13, w14; \ + mul x15, x12, x13; \ + umulh x16, x12, x13; \ + adds x8, x8, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x9, x9, x14; \ + adds x15, x15, x15; \ + adcs x16, x16, x16; \ + adc x9, x9, xzr; \ + adds x7, x7, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + subs x10, x10, x12; \ + sbcs x11, x11, x13; \ + csetm x16, cc; \ + eor x10, x10, x16; \ + subs x10, x10, x16; \ + eor x11, x11, x16; \ + sbc x11, x11, x16; \ + adds x6, x6, x4; \ + adcs x7, x7, x5; \ + adcs x8, x8, xzr; \ + adc x9, x9, xzr; \ + umull x12, w10, w10; \ + lsr x5, x10, #32; \ + umull x13, w5, w5; \ + umull x5, w10, w5; \ + adds x12, x12, x5, lsl #33; \ + lsr x5, x5, #31; \ + adc x13, x13, x5; \ + umull x15, w11, w11; \ + lsr x5, x11, #32; \ + umull x14, w5, w5; \ + umull x5, w11, w5; \ + mul x4, x10, x11; \ + umulh x16, x10, x11; \ + adds x15, x15, x5, lsl #33; \ + lsr x5, x5, #31; \ + adc x14, x14, x5; \ + adds x4, x4, x4; \ + adcs x16, x16, x16; \ + adc x14, x14, xzr; \ + adds x13, x13, x4; \ + adcs x15, x15, x16; \ + adc x14, x14, xzr; \ + adds x4, x2, x6; \ + adcs x5, x3, x7; \ + adcs x6, x6, x8; \ + adcs x7, x7, x9; \ + csetm x16, cc; \ + subs x4, x4, x12; \ + sbcs x5, x5, x13; \ + sbcs x6, x6, x15; \ + sbcs x7, x7, x14; \ + adcs x8, x8, x16; \ + adc x9, x9, x16; \ + mov x10, #0x26; \ + umull x12, w6, w10; \ + add x12, x12, w2, uxtw; \ + lsr x2, x2, #32; \ + lsr x6, x6, #32; \ + umaddl x6, w6, w10, x2; \ + mov x2, x12; \ + umull x12, w7, w10; \ + add x12, x12, w3, uxtw; \ + lsr x3, x3, #32; \ + lsr x7, x7, #32; \ + umaddl x7, w7, w10, x3; \ + mov x3, x12; \ + umull x12, w8, w10; \ + add x12, x12, w4, uxtw; \ + lsr x4, x4, #32; \ + lsr x8, x8, #32; \ + umaddl x8, w8, w10, x4; \ + mov x4, x12; \ + umull x12, w9, w10; \ + add x12, x12, w5, uxtw; \ + lsr x5, x5, #32; \ + lsr x9, x9, #32; \ + umaddl x9, w9, w10, x5; \ + mov x5, x12; \ + lsr x13, x9, #31; \ + mov x11, #0x13; \ + umull x11, w11, w13; \ + add x2, x2, x11; \ + adds x2, x2, x6, lsl #32; \ + extr x10, x7, x6, #32; \ + adcs x3, x3, x10; \ + extr x10, x8, x7, #32; \ + adcs x4, x4, x10; \ + extr x10, x9, x8, #32; \ + lsl x11, x13, #63; \ + eor x5, x5, x11; \ + adc x5, x5, x10; \ + stp x2, x3, [P0]; \ + stp x4, x5, [P0+16] + +// Modular addition with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(p0,p1,p2) \ + ldp x5, x6, [p1]; \ + ldp x4, x3, [p2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [p1+16]; \ + ldp x4, x3, [p2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [p0]; \ + stp x7, x8, [p0+16] + +// Combined z = c * x + y with reduction only < 2 * p_25519 +// where c is initially in the X1 register. It is assumed +// that 19 * (c * x + y) < 2^60 * 2^256 so we don't need a +// high mul in the final part. + +#define cmadd_4(p0,p2,p3) \ + ldp x7, x8, [p2]; \ + ldp x9, x10, [p2+16]; \ + mul x3, x1, x7; \ + mul x4, x1, x8; \ + mul x5, x1, x9; \ + mul x6, x1, x10; \ + umulh x7, x1, x7; \ + umulh x8, x1, x8; \ + umulh x9, x1, x9; \ + umulh x10, x1, x10; \ + adds x4, x4, x7; \ + adcs x5, x5, x8; \ + adcs x6, x6, x9; \ + adc x10, x10, xzr; \ + ldp x7, x8, [p3]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x7, x8, [p3+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + adc x10, x10, xzr; \ + cmn x6, x6; \ + bic x6, x6, #0x8000000000000000; \ + adc x8, x10, x10; \ + mov x9, #19; \ + mul x7, x8, x9; \ + adds x3, x3, x7; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [p0]; \ + stp x5, x6, [p0+16] + +// Multiplex: z := if NZ then x else y + +#define mux_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x2, x3, [p2]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0]; \ + ldp x0, x1, [p1+16]; \ + ldp x2, x3, [p2+16]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0+16] + +S2N_BN_SYMBOL(curve25519_x25519_byte): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the inputs to the local variables with minimal mangling: +// +// - The scalar is in principle turned into 01xxx...xxx000 but +// in the structure below the special handling of these bits is +// explicit in the main computation; the scalar is just copied. +// +// - The point x coord is reduced mod 2^255 by masking off the +// top bit. In the main loop we only need reduction < 2 * p_25519. + + ldrb w10, [x1] + ldrb w0, [x1, #1] + orr x10, x10, x0, lsl #8 + ldrb w0, [x1, #2] + orr x10, x10, x0, lsl #16 + ldrb w0, [x1, #3] + orr x10, x10, x0, lsl #24 + ldrb w0, [x1, #4] + orr x10, x10, x0, lsl #32 + ldrb w0, [x1, #5] + orr x10, x10, x0, lsl #40 + ldrb w0, [x1, #6] + orr x10, x10, x0, lsl #48 + ldrb w0, [x1, #7] + orr x10, x10, x0, lsl #56 + ldrb w11, [x1, #8] + ldrb w0, [x1, #9] + orr x11, x11, x0, lsl #8 + ldrb w0, [x1, #10] + orr x11, x11, x0, lsl #16 + ldrb w0, [x1, #11] + orr x11, x11, x0, lsl #24 + ldrb w0, [x1, #12] + orr x11, x11, x0, lsl #32 + ldrb w0, [x1, #13] + orr x11, x11, x0, lsl #40 + ldrb w0, [x1, #14] + orr x11, x11, x0, lsl #48 + ldrb w0, [x1, #15] + orr x11, x11, x0, lsl #56 + stp x10, x11, [scalar] + + ldrb w12, [x1, #16] + ldrb w0, [x1, #17] + orr x12, x12, x0, lsl #8 + ldrb w0, [x1, #18] + orr x12, x12, x0, lsl #16 + ldrb w0, [x1, #19] + orr x12, x12, x0, lsl #24 + ldrb w0, [x1, #20] + orr x12, x12, x0, lsl #32 + ldrb w0, [x1, #21] + orr x12, x12, x0, lsl #40 + ldrb w0, [x1, #22] + orr x12, x12, x0, lsl #48 + ldrb w0, [x1, #23] + orr x12, x12, x0, lsl #56 + ldrb w13, [x1, #24] + ldrb w0, [x1, #25] + orr x13, x13, x0, lsl #8 + ldrb w0, [x1, #26] + orr x13, x13, x0, lsl #16 + ldrb w0, [x1, #27] + orr x13, x13, x0, lsl #24 + ldrb w0, [x1, #28] + orr x13, x13, x0, lsl #32 + ldrb w0, [x1, #29] + orr x13, x13, x0, lsl #40 + ldrb w0, [x1, #30] + orr x13, x13, x0, lsl #48 + ldrb w0, [x1, #31] + orr x13, x13, x0, lsl #56 + stp x12, x13, [scalar+16] + + ldrb w10, [x2] + ldrb w0, [x2, #1] + orr x10, x10, x0, lsl #8 + ldrb w0, [x2, #2] + orr x10, x10, x0, lsl #16 + ldrb w0, [x2, #3] + orr x10, x10, x0, lsl #24 + ldrb w0, [x2, #4] + orr x10, x10, x0, lsl #32 + ldrb w0, [x2, #5] + orr x10, x10, x0, lsl #40 + ldrb w0, [x2, #6] + orr x10, x10, x0, lsl #48 + ldrb w0, [x2, #7] + orr x10, x10, x0, lsl #56 + ldrb w11, [x2, #8] + ldrb w0, [x2, #9] + orr x11, x11, x0, lsl #8 + ldrb w0, [x2, #10] + orr x11, x11, x0, lsl #16 + ldrb w0, [x2, #11] + orr x11, x11, x0, lsl #24 + ldrb w0, [x2, #12] + orr x11, x11, x0, lsl #32 + ldrb w0, [x2, #13] + orr x11, x11, x0, lsl #40 + ldrb w0, [x2, #14] + orr x11, x11, x0, lsl #48 + ldrb w0, [x2, #15] + orr x11, x11, x0, lsl #56 + stp x10, x11, [pointx] + + ldrb w12, [x2, #16] + ldrb w0, [x2, #17] + orr x12, x12, x0, lsl #8 + ldrb w0, [x2, #18] + orr x12, x12, x0, lsl #16 + ldrb w0, [x2, #19] + orr x12, x12, x0, lsl #24 + ldrb w0, [x2, #20] + orr x12, x12, x0, lsl #32 + ldrb w0, [x2, #21] + orr x12, x12, x0, lsl #40 + ldrb w0, [x2, #22] + orr x12, x12, x0, lsl #48 + ldrb w0, [x2, #23] + orr x12, x12, x0, lsl #56 + ldrb w13, [x2, #24] + ldrb w0, [x2, #25] + orr x13, x13, x0, lsl #8 + ldrb w0, [x2, #26] + orr x13, x13, x0, lsl #16 + ldrb w0, [x2, #27] + orr x13, x13, x0, lsl #24 + ldrb w0, [x2, #28] + orr x13, x13, x0, lsl #32 + ldrb w0, [x2, #29] + orr x13, x13, x0, lsl #40 + ldrb w0, [x2, #30] + orr x13, x13, x0, lsl #48 + ldrb w0, [x2, #31] + orr x13, x13, x0, lsl #56 + and x13, x13, #0x7fffffffffffffff + stp x12, x13, [pointx+16] + +// Initialize with explicit doubling in order to handle set bit 254. +// Set swap = 1 and (xm,zm) = (x,1) then double as (xn,zn) = 2 * (x,1). +// We use the fact that the point x coordinate is still in registers. +// Since zm = 1 we could do the doubling with an operation count of +// 2 * S + M instead of 2 * S + 2 * M, but it doesn't seem worth +// the slight complication arising from a different linear combination. + + mov swap, #1 + stp x10, x11, [xm] + stp x12, x13, [xm+16] + stp swap, xzr, [zm] + stp xzr, xzr, [zm+16] + + sub_twice4(d,xm,zm) + add_twice4(s,xm,zm) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + +// The main loop over unmodified bits from i = 253, ..., i = 3 (inclusive). +// This is a classic Montgomery ladder, with the main coordinates only +// reduced mod 2 * p_25519, some intermediate results even more loosely. + + mov i, #253 + +scalarloop: + +// sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn + + sub_twice4(dm,xm,zm) + add_twice4(sn,xn,zn) + sub_twice4(dn,xn,zn) + add_twice4(sm,xm,zm) + +// ADDING: dmsn = dm * sn +// DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + + mul_4(dmsn,sn,dm) + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #1 + + cmp swap, x2 + mov swap, x2 + + mux_4(d,dm,dn) + mux_4(s,sm,sn) + +// ADDING: dnsm = sm * dn + + mul_4(dnsm,sm,dn) + +// DOUBLING: d = (xt - zt)^2 + + sqr_4(d,d) + +// ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 +// DOUBLING: s = (xt + zt)^2 + + sub_twice4(dpro,dmsn,dnsm) + sqr_4(s,s) + add_twice4(spro,dmsn,dnsm) + sqr_4(dpro,dpro) + +// DOUBLING: p = 4 * xt * zt = s - d + + sub_twice4(p,s,d) + +// ADDING: xm' = (dmsn + dnsm)^2 + + sqr_4(xm,spro) + +// DOUBLING: e = 121666 * p + d + + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + +// DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d + + mul_4(xn,s,d) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_4(zm,dpro,pointx) + +// DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) +// = p * (d + 121666 * p) + + mul_4(zn,p,e) + +// Loop down as far as 3 (inclusive) + + sub i, i, #1 + cmp i, #3 + bcs scalarloop + +// Multiplex directly into (xn,zn) then do three pure doubling steps; +// this accounts for the implicit zeroing of the three lowest bits +// of the scalar. On the very last doubling we *fully* reduce zn mod +// p_25519 to ease checking for degeneracy below. + + cmp swap, xzr + mux_4(xn,xm,xn) + mux_4(zn,zm,zn) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_p25519(zn,p,e) + +// The projective result of the scalar multiplication is now (xn,zn). +// First set up the constant sn = 2^255 - 19 for the modular inverse. + + mov x0, #-19 + mov x1, #-1 + mov x2, #0x7fffffffffffffff + stp x0, x1, [sn] + stp x1, x2, [sn+16] + +// Prepare to call the modular inverse function to get zm = 1/zn + + mov x0, #4 + add x1, zm + add x2, zn + add x3, sn + add x4, p + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// Since we eventually want to return 0 when the result is the point at +// infinity, we force xn = 0 whenever zn = 0. This avoids building in a +// dependency on the behavior of modular inverse in out-of-scope cases. + + ldp x0, x1, [zn] + ldp x2, x3, [zn+16] + orr x0, x0, x1 + orr x2, x2, x3 + orr x4, x0, x2 + cmp x4, xzr + ldp x0, x1, [xn] + csel x0, x0, xzr, ne + csel x1, x1, xzr, ne + ldp x2, x3, [xn+16] + stp x0, x1, [xn] + csel x2, x2, xzr, ne + csel x3, x3, xzr, ne + stp x2, x3, [xn+16] + +// Now the result is xn * (1/zn), fully reduced modulo p. + + mul_p25519(zn,xn,zm) + + ldp x10, x11, [zn] + strb w10, [resx] + lsr x10, x10, #8 + strb w10, [resx+1] + lsr x10, x10, #8 + strb w10, [resx+2] + lsr x10, x10, #8 + strb w10, [resx+3] + lsr x10, x10, #8 + strb w10, [resx+4] + lsr x10, x10, #8 + strb w10, [resx+5] + lsr x10, x10, #8 + strb w10, [resx+6] + lsr x10, x10, #8 + strb w10, [resx+7] + + strb w11, [resx+8] + lsr x11, x11, #8 + strb w11, [resx+9] + lsr x11, x11, #8 + strb w11, [resx+10] + lsr x11, x11, #8 + strb w11, [resx+11] + lsr x11, x11, #8 + strb w11, [resx+12] + lsr x11, x11, #8 + strb w11, [resx+13] + lsr x11, x11, #8 + strb w11, [resx+14] + lsr x11, x11, #8 + strb w11, [resx+15] + + ldp x12, x13, [zn+16] + strb w12, [resx+16] + lsr x12, x12, #8 + strb w12, [resx+17] + lsr x12, x12, #8 + strb w12, [resx+18] + lsr x12, x12, #8 + strb w12, [resx+19] + lsr x12, x12, #8 + strb w12, [resx+20] + lsr x12, x12, #8 + strb w12, [resx+21] + lsr x12, x12, #8 + strb w12, [resx+22] + lsr x12, x12, #8 + strb w12, [resx+23] + + strb w13, [resx+24] + lsr x13, x13, #8 + strb w13, [resx+25] + lsr x13, x13, #8 + strb w13, [resx+26] + lsr x13, x13, #8 + strb w13, [resx+27] + lsr x13, x13, #8 + strb w13, [resx+28] + lsr x13, x13, #8 + strb w13, [resx+29] + lsr x13, x13, #8 + strb w13, [resx+30] + lsr x13, x13, #8 + strb w13, [resx+31] + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/curve25519_x25519_byte_alt.S b/arm/curve25519/curve25519_x25519_byte_alt.S new file mode 100644 index 0000000000..c291f8d828 --- /dev/null +++ b/arm/curve25519/curve25519_x25519_byte_alt.S @@ -0,0 +1,1205 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 (byte array arguments) +// Inputs scalar[32] (bytes), point[32] (bytes); output res[32] (bytes) +// +// extern void curve25519_x25519_byte_alt +// (uint8_t res[static 32],uint8_t scalar[static 32],uint8_t point[static 32]) +// +// Given a scalar n and the X coordinate of an input point P = (X,Y) on +// curve25519 (Y can live in any extension field of characteristic 2^255-19), +// this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the +// point at infinity. Both n and X inputs are first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); +// in particular the lower three bits of n are set to zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_byte_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_byte_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable homes for the input result argument during the whole body +// and other variables that are only needed prior to the modular inverse. + +#define res x23 +#define i x20 +#define swap x21 + +// Pointers to result x coord to be written + +#define resx res, #0 + +// Pointer-offset pairs for temporaries on stack with some aliasing. + +#define scalar sp, #(0*NUMSIZE) + +#define pointx sp, #(1*NUMSIZE) + +#define zm sp, #(2*NUMSIZE) +#define sm sp, #(2*NUMSIZE) +#define dpro sp, #(2*NUMSIZE) + +#define sn sp, #(3*NUMSIZE) + +#define dm sp, #(4*NUMSIZE) + +#define zn sp, #(5*NUMSIZE) +#define dn sp, #(5*NUMSIZE) +#define e sp, #(5*NUMSIZE) + +#define dmsn sp, #(6*NUMSIZE) +#define p sp, #(6*NUMSIZE) + +#define xm sp, #(7*NUMSIZE) +#define dnsm sp, #(7*NUMSIZE) +#define spro sp, #(7*NUMSIZE) + +#define d sp, #(8*NUMSIZE) + +#define xn sp, #(9*NUMSIZE) +#define s sp, #(9*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (10*NUMSIZE) + +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + orr x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + madd x11, x7, x8, x7; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, xzr; \ + csel x7, x7, xzr, cc; \ + subs x12, x12, x7; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + and x15, x15, #0x7fffffffffffffff; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + bic x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + mul x11, x7, x8; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adc x15, x15, xzr; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x11, x2, x5; \ + umulh x12, x2, x5; \ + mul x7, x2, x4; \ + umulh x6, x2, x4; \ + adds x10, x10, x7; \ + adcs x11, x11, x6; \ + mul x7, x3, x4; \ + umulh x6, x3, x4; \ + adc x6, x6, xzr; \ + adds x11, x11, x7; \ + mul x13, x4, x5; \ + umulh x14, x4, x5; \ + adcs x12, x12, x6; \ + mul x7, x3, x5; \ + umulh x6, x3, x5; \ + adc x6, x6, xzr; \ + adds x12, x12, x7; \ + adcs x13, x13, x6; \ + adc x14, x14, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + cset x6, cs; \ + umulh x7, x2, x2; \ + mul x8, x2, x2; \ + adds x9, x9, x7; \ + mul x7, x3, x3; \ + adcs x10, x10, x7; \ + umulh x7, x3, x3; \ + adcs x11, x11, x7; \ + mul x7, x4, x4; \ + adcs x12, x12, x7; \ + umulh x7, x4, x4; \ + adcs x13, x13, x7; \ + mul x7, x5, x5; \ + adcs x14, x14, x7; \ + umulh x7, x5, x5; \ + adc x6, x6, x7; \ + mov x3, #0x26; \ + mul x7, x3, x12; \ + umulh x4, x3, x12; \ + adds x8, x8, x7; \ + mul x7, x3, x13; \ + umulh x13, x3, x13; \ + adcs x9, x9, x7; \ + mul x7, x3, x14; \ + umulh x14, x3, x14; \ + adcs x10, x10, x7; \ + mul x7, x3, x6; \ + umulh x6, x3, x6; \ + adcs x11, x11, x7; \ + cset x12, cs; \ + adds x11, x11, x14; \ + adc x12, x12, x6; \ + cmn x11, x11; \ + bic x11, x11, #0x8000000000000000; \ + adc x2, x12, x12; \ + mov x3, #0x13; \ + mul x7, x3, x2; \ + adds x8, x8, x7; \ + adcs x9, x9, x4; \ + adcs x10, x10, x13; \ + adc x11, x11, xzr; \ + stp x8, x9, [P0]; \ + stp x10, x11, [P0+16] + +// Modular addition with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(p0,p1,p2) \ + ldp x5, x6, [p1]; \ + ldp x4, x3, [p2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [p1+16]; \ + ldp x4, x3, [p2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [p0]; \ + stp x7, x8, [p0+16] + +// Combined z = c * x + y with reduction only < 2 * p_25519 +// where c is initially in the X1 register. It is assumed +// that 19 * (c * x + y) < 2^60 * 2^256 so we don't need a +// high mul in the final part. + +#define cmadd_4(p0,p2,p3) \ + ldp x7, x8, [p2]; \ + ldp x9, x10, [p2+16]; \ + mul x3, x1, x7; \ + mul x4, x1, x8; \ + mul x5, x1, x9; \ + mul x6, x1, x10; \ + umulh x7, x1, x7; \ + umulh x8, x1, x8; \ + umulh x9, x1, x9; \ + umulh x10, x1, x10; \ + adds x4, x4, x7; \ + adcs x5, x5, x8; \ + adcs x6, x6, x9; \ + adc x10, x10, xzr; \ + ldp x7, x8, [p3]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x7, x8, [p3+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + adc x10, x10, xzr; \ + cmn x6, x6; \ + bic x6, x6, #0x8000000000000000; \ + adc x8, x10, x10; \ + mov x9, #19; \ + mul x7, x8, x9; \ + adds x3, x3, x7; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [p0]; \ + stp x5, x6, [p0+16] + +// Multiplex: z := if NZ then x else y + +#define mux_4(p0,p1,p2) \ + ldp x0, x1, [p1]; \ + ldp x2, x3, [p2]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0]; \ + ldp x0, x1, [p1+16]; \ + ldp x2, x3, [p2+16]; \ + csel x0, x0, x2, ne; \ + csel x1, x1, x3, ne; \ + stp x0, x1, [p0+16] + +S2N_BN_SYMBOL(curve25519_x25519_byte_alt): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the inputs to the local variables with minimal mangling: +// +// - The scalar is in principle turned into 01xxx...xxx000 but +// in the structure below the special handling of these bits is +// explicit in the main computation; the scalar is just copied. +// +// - The point x coord is reduced mod 2^255 by masking off the +// top bit. In the main loop we only need reduction < 2 * p_25519. + + ldrb w10, [x1] + ldrb w0, [x1, #1] + orr x10, x10, x0, lsl #8 + ldrb w0, [x1, #2] + orr x10, x10, x0, lsl #16 + ldrb w0, [x1, #3] + orr x10, x10, x0, lsl #24 + ldrb w0, [x1, #4] + orr x10, x10, x0, lsl #32 + ldrb w0, [x1, #5] + orr x10, x10, x0, lsl #40 + ldrb w0, [x1, #6] + orr x10, x10, x0, lsl #48 + ldrb w0, [x1, #7] + orr x10, x10, x0, lsl #56 + ldrb w11, [x1, #8] + ldrb w0, [x1, #9] + orr x11, x11, x0, lsl #8 + ldrb w0, [x1, #10] + orr x11, x11, x0, lsl #16 + ldrb w0, [x1, #11] + orr x11, x11, x0, lsl #24 + ldrb w0, [x1, #12] + orr x11, x11, x0, lsl #32 + ldrb w0, [x1, #13] + orr x11, x11, x0, lsl #40 + ldrb w0, [x1, #14] + orr x11, x11, x0, lsl #48 + ldrb w0, [x1, #15] + orr x11, x11, x0, lsl #56 + stp x10, x11, [scalar] + + ldrb w12, [x1, #16] + ldrb w0, [x1, #17] + orr x12, x12, x0, lsl #8 + ldrb w0, [x1, #18] + orr x12, x12, x0, lsl #16 + ldrb w0, [x1, #19] + orr x12, x12, x0, lsl #24 + ldrb w0, [x1, #20] + orr x12, x12, x0, lsl #32 + ldrb w0, [x1, #21] + orr x12, x12, x0, lsl #40 + ldrb w0, [x1, #22] + orr x12, x12, x0, lsl #48 + ldrb w0, [x1, #23] + orr x12, x12, x0, lsl #56 + ldrb w13, [x1, #24] + ldrb w0, [x1, #25] + orr x13, x13, x0, lsl #8 + ldrb w0, [x1, #26] + orr x13, x13, x0, lsl #16 + ldrb w0, [x1, #27] + orr x13, x13, x0, lsl #24 + ldrb w0, [x1, #28] + orr x13, x13, x0, lsl #32 + ldrb w0, [x1, #29] + orr x13, x13, x0, lsl #40 + ldrb w0, [x1, #30] + orr x13, x13, x0, lsl #48 + ldrb w0, [x1, #31] + orr x13, x13, x0, lsl #56 + stp x12, x13, [scalar+16] + + ldrb w10, [x2] + ldrb w0, [x2, #1] + orr x10, x10, x0, lsl #8 + ldrb w0, [x2, #2] + orr x10, x10, x0, lsl #16 + ldrb w0, [x2, #3] + orr x10, x10, x0, lsl #24 + ldrb w0, [x2, #4] + orr x10, x10, x0, lsl #32 + ldrb w0, [x2, #5] + orr x10, x10, x0, lsl #40 + ldrb w0, [x2, #6] + orr x10, x10, x0, lsl #48 + ldrb w0, [x2, #7] + orr x10, x10, x0, lsl #56 + ldrb w11, [x2, #8] + ldrb w0, [x2, #9] + orr x11, x11, x0, lsl #8 + ldrb w0, [x2, #10] + orr x11, x11, x0, lsl #16 + ldrb w0, [x2, #11] + orr x11, x11, x0, lsl #24 + ldrb w0, [x2, #12] + orr x11, x11, x0, lsl #32 + ldrb w0, [x2, #13] + orr x11, x11, x0, lsl #40 + ldrb w0, [x2, #14] + orr x11, x11, x0, lsl #48 + ldrb w0, [x2, #15] + orr x11, x11, x0, lsl #56 + stp x10, x11, [pointx] + + ldrb w12, [x2, #16] + ldrb w0, [x2, #17] + orr x12, x12, x0, lsl #8 + ldrb w0, [x2, #18] + orr x12, x12, x0, lsl #16 + ldrb w0, [x2, #19] + orr x12, x12, x0, lsl #24 + ldrb w0, [x2, #20] + orr x12, x12, x0, lsl #32 + ldrb w0, [x2, #21] + orr x12, x12, x0, lsl #40 + ldrb w0, [x2, #22] + orr x12, x12, x0, lsl #48 + ldrb w0, [x2, #23] + orr x12, x12, x0, lsl #56 + ldrb w13, [x2, #24] + ldrb w0, [x2, #25] + orr x13, x13, x0, lsl #8 + ldrb w0, [x2, #26] + orr x13, x13, x0, lsl #16 + ldrb w0, [x2, #27] + orr x13, x13, x0, lsl #24 + ldrb w0, [x2, #28] + orr x13, x13, x0, lsl #32 + ldrb w0, [x2, #29] + orr x13, x13, x0, lsl #40 + ldrb w0, [x2, #30] + orr x13, x13, x0, lsl #48 + ldrb w0, [x2, #31] + orr x13, x13, x0, lsl #56 + and x13, x13, #0x7fffffffffffffff + stp x12, x13, [pointx+16] + +// Initialize with explicit doubling in order to handle set bit 254. +// Set swap = 1 and (xm,zm) = (x,1) then double as (xn,zn) = 2 * (x,1). +// We use the fact that the point x coordinate is still in registers. +// Since zm = 1 we could do the doubling with an operation count of +// 2 * S + M instead of 2 * S + 2 * M, but it doesn't seem worth +// the slight complication arising from a different linear combination. + + mov swap, #1 + stp x10, x11, [xm] + stp x12, x13, [xm+16] + stp swap, xzr, [zm] + stp xzr, xzr, [zm+16] + + sub_twice4(d,xm,zm) + add_twice4(s,xm,zm) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + +// The main loop over unmodified bits from i = 253, ..., i = 3 (inclusive). +// This is a classic Montgomery ladder, with the main coordinates only +// reduced mod 2 * p_25519, some intermediate results even more loosely. + + mov i, #253 + +scalarloop: + +// sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn + + sub_twice4(dm,xm,zm) + add_twice4(sn,xn,zn) + sub_twice4(dn,xn,zn) + add_twice4(sm,xm,zm) + +// ADDING: dmsn = dm * sn +// DOUBLING: mux d = xt - zt and s = xt + zt for appropriate choice of (xt,zt) + + mul_4(dmsn,sn,dm) + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #1 + + cmp swap, x2 + mov swap, x2 + + mux_4(d,dm,dn) + mux_4(s,sm,sn) + +// ADDING: dnsm = sm * dn + + mul_4(dnsm,sm,dn) + +// DOUBLING: d = (xt - zt)^2 + + sqr_4(d,d) + +// ADDING: dpro = (dmsn - dnsm)^2, spro = (dmsn + dnsm)^2 +// DOUBLING: s = (xt + zt)^2 + + sub_twice4(dpro,dmsn,dnsm) + sqr_4(s,s) + add_twice4(spro,dmsn,dnsm) + sqr_4(dpro,dpro) + +// DOUBLING: p = 4 * xt * zt = s - d + + sub_twice4(p,s,d) + +// ADDING: xm' = (dmsn + dnsm)^2 + + sqr_4(xm,spro) + +// DOUBLING: e = 121666 * p + d + + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + +// DOUBLING: xn' = (xt + zt)^2 * (xt - zt)^2 = s * d + + mul_4(xn,s,d) + +// ADDING: zm' = x * (dmsn - dnsm)^2 + + mul_4(zm,dpro,pointx) + +// DOUBLING: zn' = (4 * xt * zt) * ((xt - zt)^2 + 121666 * (4 * xt * zt)) +// = p * (d + 121666 * p) + + mul_4(zn,p,e) + +// Loop down as far as 3 (inclusive) + + sub i, i, #1 + cmp i, #3 + bcs scalarloop + +// Multiplex directly into (xn,zn) then do three pure doubling steps; +// this accounts for the implicit zeroing of the three lowest bits +// of the scalar. On the very last doubling we *fully* reduce zn mod +// p_25519 to ease checking for degeneracy below. + + cmp swap, xzr + mux_4(xn,xm,xn) + mux_4(zn,zm,zn) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_4(zn,p,e) + + sub_twice4(d,xn,zn) + add_twice4(s,xn,zn) + sqr_4(d,d) + sqr_4(s,s) + sub_twice4(p,s,d) + mov x1, 0xdb42 + orr x1, x1, 0x10000 + cmadd_4(e,p,d) + mul_4(xn,s,d) + mul_p25519(zn,p,e) + +// The projective result of the scalar multiplication is now (xn,zn). +// First set up the constant sn = 2^255 - 19 for the modular inverse. + + mov x0, #-19 + mov x1, #-1 + mov x2, #0x7fffffffffffffff + stp x0, x1, [sn] + stp x1, x2, [sn+16] + +// Prepare to call the modular inverse function to get zm = 1/zn + + mov x0, #4 + add x1, zm + add x2, zn + add x3, sn + add x4, p + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// Since we eventually want to return 0 when the result is the point at +// infinity, we force xn = 0 whenever zn = 0. This avoids building in a +// dependency on the behavior of modular inverse in out-of-scope cases. + + ldp x0, x1, [zn] + ldp x2, x3, [zn+16] + orr x0, x0, x1 + orr x2, x2, x3 + orr x4, x0, x2 + cmp x4, xzr + ldp x0, x1, [xn] + csel x0, x0, xzr, ne + csel x1, x1, xzr, ne + ldp x2, x3, [xn+16] + stp x0, x1, [xn] + csel x2, x2, xzr, ne + csel x3, x3, xzr, ne + stp x2, x3, [xn+16] + +// Now the result is xn * (1/zn), fully reduced modulo p. + + mul_p25519(zn,xn,zm) + + ldp x10, x11, [zn] + strb w10, [resx] + lsr x10, x10, #8 + strb w10, [resx+1] + lsr x10, x10, #8 + strb w10, [resx+2] + lsr x10, x10, #8 + strb w10, [resx+3] + lsr x10, x10, #8 + strb w10, [resx+4] + lsr x10, x10, #8 + strb w10, [resx+5] + lsr x10, x10, #8 + strb w10, [resx+6] + lsr x10, x10, #8 + strb w10, [resx+7] + + strb w11, [resx+8] + lsr x11, x11, #8 + strb w11, [resx+9] + lsr x11, x11, #8 + strb w11, [resx+10] + lsr x11, x11, #8 + strb w11, [resx+11] + lsr x11, x11, #8 + strb w11, [resx+12] + lsr x11, x11, #8 + strb w11, [resx+13] + lsr x11, x11, #8 + strb w11, [resx+14] + lsr x11, x11, #8 + strb w11, [resx+15] + + ldp x12, x13, [zn+16] + strb w12, [resx+16] + lsr x12, x12, #8 + strb w12, [resx+17] + lsr x12, x12, #8 + strb w12, [resx+18] + lsr x12, x12, #8 + strb w12, [resx+19] + lsr x12, x12, #8 + strb w12, [resx+20] + lsr x12, x12, #8 + strb w12, [resx+21] + lsr x12, x12, #8 + strb w12, [resx+22] + lsr x12, x12, #8 + strb w12, [resx+23] + + strb w13, [resx+24] + lsr x13, x13, #8 + strb w13, [resx+25] + lsr x13, x13, #8 + strb w13, [resx+26] + lsr x13, x13, #8 + strb w13, [resx+27] + lsr x13, x13, #8 + strb w13, [resx+28] + lsr x13, x13, #8 + strb w13, [resx+29] + lsr x13, x13, #8 + strb w13, [resx+30] + lsr x13, x13, #8 + strb w13, [resx+31] + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/curve25519_x25519base_byte.S b/arm/curve25519/curve25519_x25519base_byte.S new file mode 100644 index 0000000000..651aea49c2 --- /dev/null +++ b/arm/curve25519/curve25519_x25519base_byte.S @@ -0,0 +1,9020 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 on base element 9 (byte array arguments) +// Input scalar[32] (bytes); output res[32] (bytes) +// +// extern void curve25519_x25519base_byte +// (uint8_t res[static 32],uint8_t scalar[static 32]) +// +// Given a scalar n, returns the X coordinate of n * G where G = (9,...) is +// the standard generator. The scalar is first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_byte) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_byte) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x23 + +// Other variables that are only needed prior to the modular inverse. + +#define tab x19 + +#define i x20 + +#define bias x21 + +#define bf x22 +#define ix x22 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) + +#define tabent sp, #(1*NUMSIZE) +#define ymx_2 sp, #(1*NUMSIZE) +#define xpy_2 sp, #(2*NUMSIZE) +#define kxy_2 sp, #(3*NUMSIZE) + +#define acc sp, #(4*NUMSIZE) +#define x_1 sp, #(4*NUMSIZE) +#define y_1 sp, #(5*NUMSIZE) +#define z_1 sp, #(6*NUMSIZE) +#define w_1 sp, #(7*NUMSIZE) +#define x_3 sp, #(4*NUMSIZE) +#define y_3 sp, #(5*NUMSIZE) +#define z_3 sp, #(6*NUMSIZE) +#define w_3 sp, #(7*NUMSIZE) + +#define tmpspace sp, #(8*NUMSIZE) +#define t0 sp, #(8*NUMSIZE) +#define t1 sp, #(9*NUMSIZE) +#define t2 sp, #(10*NUMSIZE) +#define t3 sp, #(11*NUMSIZE) +#define t4 sp, #(12*NUMSIZE) +#define t5 sp, #(13*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (14*NUMSIZE) + +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umaddl x5, w5, w0, x5; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + mov x3, #0x13; \ + tst x10, #0x8000000000000000; \ + csel x3, x3, xzr, pl; \ + subs x7, x7, x3; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + and x10, x10, #0x7fffffffffffffff; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umull x5, w5, w0; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +S2N_BN_SYMBOL(curve25519_x25519base_byte): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the input scalar to its local variable while mangling it. +// In principle the mangling is into 01xxx...xxx000, but actually +// we only clear the top two bits so 00xxx...xxxxxx. The additional +// 2^254 * G is taken care of by the starting value for the addition +// chain below, while we never look at the three low bits at all. + + ldrb w10, [x1] + ldrb w0, [x1, #1] + orr x10, x10, x0, lsl #8 + ldrb w0, [x1, #2] + orr x10, x10, x0, lsl #16 + ldrb w0, [x1, #3] + orr x10, x10, x0, lsl #24 + ldrb w0, [x1, #4] + orr x10, x10, x0, lsl #32 + ldrb w0, [x1, #5] + orr x10, x10, x0, lsl #40 + ldrb w0, [x1, #6] + orr x10, x10, x0, lsl #48 + ldrb w0, [x1, #7] + orr x10, x10, x0, lsl #56 + ldrb w11, [x1, #8] + ldrb w0, [x1, #9] + orr x11, x11, x0, lsl #8 + ldrb w0, [x1, #10] + orr x11, x11, x0, lsl #16 + ldrb w0, [x1, #11] + orr x11, x11, x0, lsl #24 + ldrb w0, [x1, #12] + orr x11, x11, x0, lsl #32 + ldrb w0, [x1, #13] + orr x11, x11, x0, lsl #40 + ldrb w0, [x1, #14] + orr x11, x11, x0, lsl #48 + ldrb w0, [x1, #15] + orr x11, x11, x0, lsl #56 + stp x10, x11, [scalar] + + ldrb w12, [x1, #16] + ldrb w0, [x1, #17] + orr x12, x12, x0, lsl #8 + ldrb w0, [x1, #18] + orr x12, x12, x0, lsl #16 + ldrb w0, [x1, #19] + orr x12, x12, x0, lsl #24 + ldrb w0, [x1, #20] + orr x12, x12, x0, lsl #32 + ldrb w0, [x1, #21] + orr x12, x12, x0, lsl #40 + ldrb w0, [x1, #22] + orr x12, x12, x0, lsl #48 + ldrb w0, [x1, #23] + orr x12, x12, x0, lsl #56 + ldrb w13, [x1, #24] + ldrb w0, [x1, #25] + orr x13, x13, x0, lsl #8 + ldrb w0, [x1, #26] + orr x13, x13, x0, lsl #16 + ldrb w0, [x1, #27] + orr x13, x13, x0, lsl #24 + ldrb w0, [x1, #28] + orr x13, x13, x0, lsl #32 + ldrb w0, [x1, #29] + orr x13, x13, x0, lsl #40 + ldrb w0, [x1, #30] + orr x13, x13, x0, lsl #48 + ldrb w0, [x1, #31] + orr x13, x13, x0, lsl #56 + bic x13, x13, #0xc000000000000000 + stp x12, x13, [scalar+16] + +// The main part of the computation is on the edwards25519 curve in +// extended-projective coordinates (X,Y,Z,T), representing a point +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// Only at the very end do we translate back to curve25519. So G +// below means the generator within edwards25519 corresponding to +// (9,...) for curve25519, via the standard isomorphism. +// +// Initialize accumulator "acc" to either (2^254 + 8) * G or just 2^254 * G +// depending on bit 3 of the scalar, the only nonzero bit of the bottom 4. +// Thus, we have effectively dealt with bits 0, 1, 2, 3, 254 and 255. + + ldr x0, [scalar] + ands xzr, x0, #8 + + adr x10, edwards25519_0g + adr x11, edwards25519_8g + ldp x0, x1, [x10] + ldp x2, x3, [x11] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc] + + ldp x0, x1, [x10, 1*16] + ldp x2, x3, [x11, 1*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+1*16] + + ldp x0, x1, [x10, 2*16] + ldp x2, x3, [x11, 2*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+2*16] + + ldp x0, x1, [x10, 3*16] + ldp x2, x3, [x11, 3*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+3*16] + + mov x0, #1 + stp x0, xzr, [acc+4*16] + stp xzr, xzr, [acc+5*16] + + ldp x0, x1, [x10, 4*16] + ldp x2, x3, [x11, 4*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+6*16] + + ldp x0, x1, [x10, 5*16] + ldp x2, x3, [x11, 5*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+7*16] + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 4 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * G at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * G = (16 * (h + 1) - (16 - l)) * G is used +// when l >= 9. Note that we can't have any bias left over at the +// end because of the clearing of bit 255 of the scalar, meaning the +// l >= 9 case cannot arise on the last iteration. + + mov i, 4 + adr tab, edwards25519_gtable + mov bias, xzr + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #15 + add bf, x2, bias + + cmp bf, 9 + cset bias, cs + + mov x0, 16 + sub x0, x0, bf + cmp bias, xzr + csel ix, x0, bf, ne + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp ix, #1 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #2 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #3 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #4 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #5 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #6 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #7 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #8 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + +// We now have the triple from the table in registers as follows +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmp bias, #0 + + csel x12, x0, x4, eq + csel x13, x1, x5, eq + csel x14, x2, x6, eq + csel x15, x3, x7, eq + stp x12, x13, [tabent] + stp x14, x15, [tabent+16] + + csel x12, x0, x4, ne + csel x13, x1, x5, ne + csel x14, x2, x6, ne + csel x15, x3, x7, ne + stp x12, x13, [tabent+32] + stp x14, x15, [tabent+48] + + mov x0, #-19 + subs x0, x0, x8 + mov x2, #-1 + sbcs x1, x2, x9 + sbcs x2, x2, x10 + mov x3, #0x7FFFFFFFFFFFFFFF + sbc x3, x3, x11 + + cmp ix, xzr + ccmp bias, xzr, #4, ne + + csel x0, x0, x8, ne + csel x1, x1, x9, ne + stp x0, x1, [tabent+64] + csel x2, x2, x10, ne + csel x3, x3, x11, ne + stp x2, x3, [tabent+80] + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + add i, i, 4 + cmp i, 256 + bcc scalarloop + +// Now we need to translate from Edwards curve edwards25519 back +// to the Montgomery form curve25519. The mapping in the affine +// representations is +// +// (x,y) |-> ((1 + y) / (1 - y), c * (1 + y) / ((1 - y) * x)) +// +// For x25519, we only need the x coordinate, and we compute this as +// +// (1 + y) / (1 - y) = (x + x * y) / (x - x * y) +// = (X/Z + T/Z) / (X/Z - T/Z) +// = (X + T) / (X - T) +// = (X + T) * inverse(X - T) +// +// We could equally well use (Z + Y) / (Z - Y), but the above has the +// same cost, and it more explicitly forces zero output whenever X = 0, +// regardless of how the modular inverse behaves on zero inputs. In +// the present setting (base point 9, mangled scalar) that doesn't +// really matter anyway since X = 0 never arises, but it seems a +// little bit tidier. Note that both Edwards point (0,1) which maps to +// the Montgomery point at infinity, and Edwards (0,-1) which maps to +// Montgomery (0,0) [this is the 2-torsion point] are both by definition +// mapped to 0 by the X coordinate mapping used to define curve25519. +// +// First the addition and subtraction: + + add_twice4(y_3,x_3,w_3) + sub_twice4(z_3,x_3,w_3) + +// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Note that this works for the weakly normalized z_3 equally well. +// The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. + + mov x0, 4 + add x1, x_3 + add x2, z_3 + adr x3, p_25519 + add x4, tmpspace + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// The final result is (X + T) / (X - T) +// This is the only operation in the whole computation that +// fully reduces modulo p_25519 since now we want the canonical +// answer as output. + + mul_p25519(x_1,y_3,x_3) + + ldp x10, x11, [x_1] + strb w10, [resx] + lsr x10, x10, #8 + strb w10, [resx+1] + lsr x10, x10, #8 + strb w10, [resx+2] + lsr x10, x10, #8 + strb w10, [resx+3] + lsr x10, x10, #8 + strb w10, [resx+4] + lsr x10, x10, #8 + strb w10, [resx+5] + lsr x10, x10, #8 + strb w10, [resx+6] + lsr x10, x10, #8 + strb w10, [resx+7] + + strb w11, [resx+8] + lsr x11, x11, #8 + strb w11, [resx+9] + lsr x11, x11, #8 + strb w11, [resx+10] + lsr x11, x11, #8 + strb w11, [resx+11] + lsr x11, x11, #8 + strb w11, [resx+12] + lsr x11, x11, #8 + strb w11, [resx+13] + lsr x11, x11, #8 + strb w11, [resx+14] + lsr x11, x11, #8 + strb w11, [resx+15] + + ldp x12, x13, [x_1+16] + strb w12, [resx+16] + lsr x12, x12, #8 + strb w12, [resx+17] + lsr x12, x12, #8 + strb w12, [resx+18] + lsr x12, x12, #8 + strb w12, [resx+19] + lsr x12, x12, #8 + strb w12, [resx+20] + lsr x12, x12, #8 + strb w12, [resx+21] + lsr x12, x12, #8 + strb w12, [resx+22] + lsr x12, x12, #8 + strb w12, [resx+23] + + strb w13, [resx+24] + lsr x13, x13, #8 + strb w13, [resx+25] + lsr x13, x13, #8 + strb w13, [resx+26] + lsr x13, x13, #8 + strb w13, [resx+27] + lsr x13, x13, #8 + strb w13, [resx+28] + lsr x13, x13, #8 + strb w13, [resx+29] + lsr x13, x13, #8 + strb w13, [resx+30] + lsr x13, x13, #8 + strb w13, [resx+31] + + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 2^254 * G and (2^254 + 8) * G in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x251037f7cf4e861d + .quad 0x10ede0fb19fb128f + .quad 0x96c033b175f5e2c8 + .quad 0x055f070d6c15fb0d + + .quad 0x7c52af2c97473e69 + .quad 0x022f82391bad8378 + .quad 0x9991e1b02adb476f + .quad 0x511144a03a99b855 + + .quad 0x5fafc3b88ff2e4ae + .quad 0x855e4ff0de1230ff + .quad 0x72e302a348492870 + .quad 0x1253c19e53dbe1bc + +edwards25519_8g: + + .quad 0x331d086e0d9abcaa + .quad 0x1e23c96d311a10c9 + .quad 0x96d0f95e58c13478 + .quad 0x2f72f7384fcfcc59 + + .quad 0x39a6cd1cfd7d87c9 + .quad 0x9867a0abd8ae153a + .quad 0xa49d2a5f35986745 + .quad 0x57012940cdfe82e1 + + .quad 0x5046a6532ec5544a + .quad 0x6d674004739ff6c9 + .quad 0x9bbaa44b234a70e3 + .quad 0x5e6d8901138cf386 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + + // 2^252 * 1 * G + + .quad 0xb1507ca1ab1c6eb9 + .quad 0xbd448f3e16b687b3 + .quad 0x3455fb7f2c7a91ab + .quad 0x7579229e2f2adec1 + .quad 0x6ab5dcb85b1c16b7 + .quad 0x94c0fce83c7b27a5 + .quad 0xa4b11c1a735517be + .quad 0x499238d0ba0eafaa + .quad 0xecf46e527aba8b57 + .quad 0x15a08c478bd1647b + .quad 0x7af1c6a65f706fef + .quad 0x6345fa78f03a30d5 + + // 2^252 * 2 * G + + .quad 0xdf02f95f1015e7a1 + .quad 0x790ec41da9b40263 + .quad 0x4d3a0ea133ea1107 + .quad 0x54f70be7e33af8c9 + .quad 0x93d3cbe9bdd8f0a4 + .quad 0xdb152c1bfd177302 + .quad 0x7dbddc6d7f17a875 + .quad 0x3e1a71cc8f426efe + .quad 0xc83ca3e390babd62 + .quad 0x80ede3670291c833 + .quad 0xc88038ccd37900c4 + .quad 0x2c5fc0231ec31fa1 + + // 2^252 * 3 * G + + .quad 0xfeba911717038b4f + .quad 0xe5123721c9deef81 + .quad 0x1c97e4e75d0d8834 + .quad 0x68afae7a23dc3bc6 + .quad 0xc422e4d102456e65 + .quad 0x87414ac1cad47b91 + .quad 0x1592e2bba2b6ffdd + .quad 0x75d9d2bff5c2100f + .quad 0x5bd9b4763626e81c + .quad 0x89966936bca02edd + .quad 0x0a41193d61f077b3 + .quad 0x3097a24200ce5471 + + // 2^252 * 4 * G + + .quad 0x57427734c7f8b84c + .quad 0xf141a13e01b270e9 + .quad 0x02d1adfeb4e564a6 + .quad 0x4bb23d92ce83bd48 + .quad 0xa162e7246695c486 + .quad 0x131d633435a89607 + .quad 0x30521561a0d12a37 + .quad 0x56704bada6afb363 + .quad 0xaf6c4aa752f912b9 + .quad 0x5e665f6cd86770c8 + .quad 0x4c35ac83a3c8cd58 + .quad 0x2b7a29c010a58a7e + + // 2^252 * 5 * G + + .quad 0xc4007f77d0c1cec3 + .quad 0x8d1020b6bac492f8 + .quad 0x32ec29d57e69daaf + .quad 0x599408759d95fce0 + .quad 0x33810a23bf00086e + .quad 0xafce925ee736ff7c + .quad 0x3d60e670e24922d4 + .quad 0x11ce9e714f96061b + .quad 0x219ef713d815bac1 + .quad 0xf141465d485be25c + .quad 0x6d5447cc4e513c51 + .quad 0x174926be5ef44393 + + // 2^252 * 6 * G + + .quad 0xb5deb2f9fc5bd5bb + .quad 0x92daa72ae1d810e1 + .quad 0xafc4cfdcb72a1c59 + .quad 0x497d78813fc22a24 + .quad 0x3ef5d41593ea022e + .quad 0x5cbcc1a20ed0eed6 + .quad 0x8fd24ecf07382c8c + .quad 0x6fa42ead06d8e1ad + .quad 0xe276824a1f73371f + .quad 0x7f7cf01c4f5b6736 + .quad 0x7e201fe304fa46e7 + .quad 0x785a36a357808c96 + + // 2^252 * 7 * G + + .quad 0x825fbdfd63014d2b + .quad 0xc852369c6ca7578b + .quad 0x5b2fcd285c0b5df0 + .quad 0x12ab214c58048c8f + .quad 0x070442985d517bc3 + .quad 0x6acd56c7ae653678 + .quad 0x00a27983985a7763 + .quad 0x5167effae512662b + .quad 0xbd4ea9e10f53c4b6 + .quad 0x1673dc5f8ac91a14 + .quad 0xa8f81a4e2acc1aba + .quad 0x33a92a7924332a25 + + // 2^252 * 8 * G + + .quad 0x9dd1f49927996c02 + .quad 0x0cb3b058e04d1752 + .quad 0x1f7e88967fd02c3e + .quad 0x2f964268cb8b3eb1 + .quad 0x7ba95ba0218f2ada + .quad 0xcff42287330fb9ca + .quad 0xdada496d56c6d907 + .quad 0x5380c296f4beee54 + .quad 0x9d4f270466898d0a + .quad 0x3d0987990aff3f7a + .quad 0xd09ef36267daba45 + .quad 0x7761455e7b1c669c + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/curve25519_x25519base_byte_alt.S b/arm/curve25519/curve25519_x25519base_byte_alt.S new file mode 100644 index 0000000000..c3fec7581e --- /dev/null +++ b/arm/curve25519/curve25519_x25519base_byte_alt.S @@ -0,0 +1,8861 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// The x25519 function for curve25519 on base element 9 (byte array arguments) +// Input scalar[32] (bytes); output res[32] (bytes) +// +// extern void curve25519_x25519base_byte_alt +// (uint8_t res[static 32],uint8_t scalar[static 32]) +// +// Given a scalar n, returns the X coordinate of n * G where G = (9,...) is +// the standard generator. The scalar is first slightly modified/mangled +// as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_byte_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_byte_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x23 + +// Other variables that are only needed prior to the modular inverse. + +#define tab x19 + +#define i x20 + +#define bias x21 + +#define bf x22 +#define ix x22 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) + +#define tabent sp, #(1*NUMSIZE) +#define ymx_2 sp, #(1*NUMSIZE) +#define xpy_2 sp, #(2*NUMSIZE) +#define kxy_2 sp, #(3*NUMSIZE) + +#define acc sp, #(4*NUMSIZE) +#define x_1 sp, #(4*NUMSIZE) +#define y_1 sp, #(5*NUMSIZE) +#define z_1 sp, #(6*NUMSIZE) +#define w_1 sp, #(7*NUMSIZE) +#define x_3 sp, #(4*NUMSIZE) +#define y_3 sp, #(5*NUMSIZE) +#define z_3 sp, #(6*NUMSIZE) +#define w_3 sp, #(7*NUMSIZE) + +#define tmpspace sp, #(8*NUMSIZE) +#define t0 sp, #(8*NUMSIZE) +#define t1 sp, #(9*NUMSIZE) +#define t2 sp, #(10*NUMSIZE) +#define t3 sp, #(11*NUMSIZE) +#define t4 sp, #(12*NUMSIZE) +#define t5 sp, #(13*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (14*NUMSIZE) + +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + orr x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + madd x11, x7, x8, x7; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, xzr; \ + csel x7, x7, xzr, cc; \ + subs x12, x12, x7; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + and x15, x15, #0x7fffffffffffffff; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + bic x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + mul x11, x7, x8; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adc x15, x15, xzr; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +S2N_BN_SYMBOL(curve25519_x25519base_byte_alt): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the input scalar to its local variable while mangling it. +// In principle the mangling is into 01xxx...xxx000, but actually +// we only clear the top two bits so 00xxx...xxxxxx. The additional +// 2^254 * G is taken care of by the starting value for the addition +// chain below, while we never look at the three low bits at all. + + ldrb w10, [x1] + ldrb w0, [x1, #1] + orr x10, x10, x0, lsl #8 + ldrb w0, [x1, #2] + orr x10, x10, x0, lsl #16 + ldrb w0, [x1, #3] + orr x10, x10, x0, lsl #24 + ldrb w0, [x1, #4] + orr x10, x10, x0, lsl #32 + ldrb w0, [x1, #5] + orr x10, x10, x0, lsl #40 + ldrb w0, [x1, #6] + orr x10, x10, x0, lsl #48 + ldrb w0, [x1, #7] + orr x10, x10, x0, lsl #56 + ldrb w11, [x1, #8] + ldrb w0, [x1, #9] + orr x11, x11, x0, lsl #8 + ldrb w0, [x1, #10] + orr x11, x11, x0, lsl #16 + ldrb w0, [x1, #11] + orr x11, x11, x0, lsl #24 + ldrb w0, [x1, #12] + orr x11, x11, x0, lsl #32 + ldrb w0, [x1, #13] + orr x11, x11, x0, lsl #40 + ldrb w0, [x1, #14] + orr x11, x11, x0, lsl #48 + ldrb w0, [x1, #15] + orr x11, x11, x0, lsl #56 + stp x10, x11, [scalar] + + ldrb w12, [x1, #16] + ldrb w0, [x1, #17] + orr x12, x12, x0, lsl #8 + ldrb w0, [x1, #18] + orr x12, x12, x0, lsl #16 + ldrb w0, [x1, #19] + orr x12, x12, x0, lsl #24 + ldrb w0, [x1, #20] + orr x12, x12, x0, lsl #32 + ldrb w0, [x1, #21] + orr x12, x12, x0, lsl #40 + ldrb w0, [x1, #22] + orr x12, x12, x0, lsl #48 + ldrb w0, [x1, #23] + orr x12, x12, x0, lsl #56 + ldrb w13, [x1, #24] + ldrb w0, [x1, #25] + orr x13, x13, x0, lsl #8 + ldrb w0, [x1, #26] + orr x13, x13, x0, lsl #16 + ldrb w0, [x1, #27] + orr x13, x13, x0, lsl #24 + ldrb w0, [x1, #28] + orr x13, x13, x0, lsl #32 + ldrb w0, [x1, #29] + orr x13, x13, x0, lsl #40 + ldrb w0, [x1, #30] + orr x13, x13, x0, lsl #48 + ldrb w0, [x1, #31] + orr x13, x13, x0, lsl #56 + bic x13, x13, #0xc000000000000000 + stp x12, x13, [scalar+16] + +// The main part of the computation is on the edwards25519 curve in +// extended-projective coordinates (X,Y,Z,T), representing a point +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// Only at the very end do we translate back to curve25519. So G +// below means the generator within edwards25519 corresponding to +// (9,...) for curve25519, via the standard isomorphism. +// +// Initialize accumulator "acc" to either (2^254 + 8) * G or just 2^254 * G +// depending on bit 3 of the scalar, the only nonzero bit of the bottom 4. +// Thus, we have effectively dealt with bits 0, 1, 2, 3, 254 and 255. + + ldr x0, [scalar] + ands xzr, x0, #8 + + adr x10, edwards25519_0g + adr x11, edwards25519_8g + ldp x0, x1, [x10] + ldp x2, x3, [x11] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc] + + ldp x0, x1, [x10, 1*16] + ldp x2, x3, [x11, 1*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+1*16] + + ldp x0, x1, [x10, 2*16] + ldp x2, x3, [x11, 2*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+2*16] + + ldp x0, x1, [x10, 3*16] + ldp x2, x3, [x11, 3*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+3*16] + + mov x0, #1 + stp x0, xzr, [acc+4*16] + stp xzr, xzr, [acc+5*16] + + ldp x0, x1, [x10, 4*16] + ldp x2, x3, [x11, 4*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+6*16] + + ldp x0, x1, [x10, 5*16] + ldp x2, x3, [x11, 5*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+7*16] + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 4 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * G at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * G = (16 * (h + 1) - (16 - l)) * G is used +// when l >= 9. Note that we can't have any bias left over at the +// end because of the clearing of bit 255 of the scalar, meaning the +// l >= 9 case cannot arise on the last iteration. + + mov i, 4 + adr tab, edwards25519_gtable + mov bias, xzr + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #15 + add bf, x2, bias + + cmp bf, 9 + cset bias, cs + + mov x0, 16 + sub x0, x0, bf + cmp bias, xzr + csel ix, x0, bf, ne + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp ix, #1 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #2 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #3 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #4 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #5 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #6 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #7 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #8 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + +// We now have the triple from the table in registers as follows +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmp bias, #0 + + csel x12, x0, x4, eq + csel x13, x1, x5, eq + csel x14, x2, x6, eq + csel x15, x3, x7, eq + stp x12, x13, [tabent] + stp x14, x15, [tabent+16] + + csel x12, x0, x4, ne + csel x13, x1, x5, ne + csel x14, x2, x6, ne + csel x15, x3, x7, ne + stp x12, x13, [tabent+32] + stp x14, x15, [tabent+48] + + mov x0, #-19 + subs x0, x0, x8 + mov x2, #-1 + sbcs x1, x2, x9 + sbcs x2, x2, x10 + mov x3, #0x7FFFFFFFFFFFFFFF + sbc x3, x3, x11 + + cmp ix, xzr + ccmp bias, xzr, #4, ne + + csel x0, x0, x8, ne + csel x1, x1, x9, ne + stp x0, x1, [tabent+64] + csel x2, x2, x10, ne + csel x3, x3, x11, ne + stp x2, x3, [tabent+80] + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd_alt(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + add i, i, 4 + cmp i, 256 + bcc scalarloop + +// Now we need to translate from Edwards curve edwards25519 back +// to the Montgomery form curve25519. The mapping in the affine +// representations is +// +// (x,y) |-> ((1 + y) / (1 - y), c * (1 + y) / ((1 - y) * x)) +// +// For x25519, we only need the x coordinate, and we compute this as +// +// (1 + y) / (1 - y) = (x + x * y) / (x - x * y) +// = (X/Z + T/Z) / (X/Z - T/Z) +// = (X + T) / (X - T) +// = (X + T) * inverse(X - T) +// +// We could equally well use (Z + Y) / (Z - Y), but the above has the +// same cost, and it more explicitly forces zero output whenever X = 0, +// regardless of how the modular inverse behaves on zero inputs. In +// the present setting (base point 9, mangled scalar) that doesn't +// really matter anyway since X = 0 never arises, but it seems a +// little bit tidier. Note that both Edwards point (0,1) which maps to +// the Montgomery point at infinity, and Edwards (0,-1) which maps to +// Montgomery (0,0) [this is the 2-torsion point] are both by definition +// mapped to 0 by the X coordinate mapping used to define curve25519. +// +// First the addition and subtraction: + + add_twice4(y_3,x_3,w_3) + sub_twice4(z_3,x_3,w_3) + +// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Note that this works for the weakly normalized z_3 equally well. +// The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. + + mov x0, 4 + add x1, x_3 + add x2, z_3 + adr x3, p_25519 + add x4, tmpspace + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// The final result is (X + T) / (X - T) +// This is the only operation in the whole computation that +// fully reduces modulo p_25519 since now we want the canonical +// answer as output. + + mul_p25519(x_1,y_3,x_3) + + ldp x10, x11, [x_1] + strb w10, [resx] + lsr x10, x10, #8 + strb w10, [resx+1] + lsr x10, x10, #8 + strb w10, [resx+2] + lsr x10, x10, #8 + strb w10, [resx+3] + lsr x10, x10, #8 + strb w10, [resx+4] + lsr x10, x10, #8 + strb w10, [resx+5] + lsr x10, x10, #8 + strb w10, [resx+6] + lsr x10, x10, #8 + strb w10, [resx+7] + + strb w11, [resx+8] + lsr x11, x11, #8 + strb w11, [resx+9] + lsr x11, x11, #8 + strb w11, [resx+10] + lsr x11, x11, #8 + strb w11, [resx+11] + lsr x11, x11, #8 + strb w11, [resx+12] + lsr x11, x11, #8 + strb w11, [resx+13] + lsr x11, x11, #8 + strb w11, [resx+14] + lsr x11, x11, #8 + strb w11, [resx+15] + + ldp x12, x13, [x_1+16] + strb w12, [resx+16] + lsr x12, x12, #8 + strb w12, [resx+17] + lsr x12, x12, #8 + strb w12, [resx+18] + lsr x12, x12, #8 + strb w12, [resx+19] + lsr x12, x12, #8 + strb w12, [resx+20] + lsr x12, x12, #8 + strb w12, [resx+21] + lsr x12, x12, #8 + strb w12, [resx+22] + lsr x12, x12, #8 + strb w12, [resx+23] + + strb w13, [resx+24] + lsr x13, x13, #8 + strb w13, [resx+25] + lsr x13, x13, #8 + strb w13, [resx+26] + lsr x13, x13, #8 + strb w13, [resx+27] + lsr x13, x13, #8 + strb w13, [resx+28] + lsr x13, x13, #8 + strb w13, [resx+29] + lsr x13, x13, #8 + strb w13, [resx+30] + lsr x13, x13, #8 + strb w13, [resx+31] + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 2^254 * G and (2^254 + 8) * G in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x251037f7cf4e861d + .quad 0x10ede0fb19fb128f + .quad 0x96c033b175f5e2c8 + .quad 0x055f070d6c15fb0d + + .quad 0x7c52af2c97473e69 + .quad 0x022f82391bad8378 + .quad 0x9991e1b02adb476f + .quad 0x511144a03a99b855 + + .quad 0x5fafc3b88ff2e4ae + .quad 0x855e4ff0de1230ff + .quad 0x72e302a348492870 + .quad 0x1253c19e53dbe1bc + +edwards25519_8g: + + .quad 0x331d086e0d9abcaa + .quad 0x1e23c96d311a10c9 + .quad 0x96d0f95e58c13478 + .quad 0x2f72f7384fcfcc59 + + .quad 0x39a6cd1cfd7d87c9 + .quad 0x9867a0abd8ae153a + .quad 0xa49d2a5f35986745 + .quad 0x57012940cdfe82e1 + + .quad 0x5046a6532ec5544a + .quad 0x6d674004739ff6c9 + .quad 0x9bbaa44b234a70e3 + .quad 0x5e6d8901138cf386 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + + // 2^252 * 1 * G + + .quad 0xb1507ca1ab1c6eb9 + .quad 0xbd448f3e16b687b3 + .quad 0x3455fb7f2c7a91ab + .quad 0x7579229e2f2adec1 + .quad 0x6ab5dcb85b1c16b7 + .quad 0x94c0fce83c7b27a5 + .quad 0xa4b11c1a735517be + .quad 0x499238d0ba0eafaa + .quad 0xecf46e527aba8b57 + .quad 0x15a08c478bd1647b + .quad 0x7af1c6a65f706fef + .quad 0x6345fa78f03a30d5 + + // 2^252 * 2 * G + + .quad 0xdf02f95f1015e7a1 + .quad 0x790ec41da9b40263 + .quad 0x4d3a0ea133ea1107 + .quad 0x54f70be7e33af8c9 + .quad 0x93d3cbe9bdd8f0a4 + .quad 0xdb152c1bfd177302 + .quad 0x7dbddc6d7f17a875 + .quad 0x3e1a71cc8f426efe + .quad 0xc83ca3e390babd62 + .quad 0x80ede3670291c833 + .quad 0xc88038ccd37900c4 + .quad 0x2c5fc0231ec31fa1 + + // 2^252 * 3 * G + + .quad 0xfeba911717038b4f + .quad 0xe5123721c9deef81 + .quad 0x1c97e4e75d0d8834 + .quad 0x68afae7a23dc3bc6 + .quad 0xc422e4d102456e65 + .quad 0x87414ac1cad47b91 + .quad 0x1592e2bba2b6ffdd + .quad 0x75d9d2bff5c2100f + .quad 0x5bd9b4763626e81c + .quad 0x89966936bca02edd + .quad 0x0a41193d61f077b3 + .quad 0x3097a24200ce5471 + + // 2^252 * 4 * G + + .quad 0x57427734c7f8b84c + .quad 0xf141a13e01b270e9 + .quad 0x02d1adfeb4e564a6 + .quad 0x4bb23d92ce83bd48 + .quad 0xa162e7246695c486 + .quad 0x131d633435a89607 + .quad 0x30521561a0d12a37 + .quad 0x56704bada6afb363 + .quad 0xaf6c4aa752f912b9 + .quad 0x5e665f6cd86770c8 + .quad 0x4c35ac83a3c8cd58 + .quad 0x2b7a29c010a58a7e + + // 2^252 * 5 * G + + .quad 0xc4007f77d0c1cec3 + .quad 0x8d1020b6bac492f8 + .quad 0x32ec29d57e69daaf + .quad 0x599408759d95fce0 + .quad 0x33810a23bf00086e + .quad 0xafce925ee736ff7c + .quad 0x3d60e670e24922d4 + .quad 0x11ce9e714f96061b + .quad 0x219ef713d815bac1 + .quad 0xf141465d485be25c + .quad 0x6d5447cc4e513c51 + .quad 0x174926be5ef44393 + + // 2^252 * 6 * G + + .quad 0xb5deb2f9fc5bd5bb + .quad 0x92daa72ae1d810e1 + .quad 0xafc4cfdcb72a1c59 + .quad 0x497d78813fc22a24 + .quad 0x3ef5d41593ea022e + .quad 0x5cbcc1a20ed0eed6 + .quad 0x8fd24ecf07382c8c + .quad 0x6fa42ead06d8e1ad + .quad 0xe276824a1f73371f + .quad 0x7f7cf01c4f5b6736 + .quad 0x7e201fe304fa46e7 + .quad 0x785a36a357808c96 + + // 2^252 * 7 * G + + .quad 0x825fbdfd63014d2b + .quad 0xc852369c6ca7578b + .quad 0x5b2fcd285c0b5df0 + .quad 0x12ab214c58048c8f + .quad 0x070442985d517bc3 + .quad 0x6acd56c7ae653678 + .quad 0x00a27983985a7763 + .quad 0x5167effae512662b + .quad 0xbd4ea9e10f53c4b6 + .quad 0x1673dc5f8ac91a14 + .quad 0xa8f81a4e2acc1aba + .quad 0x33a92a7924332a25 + + // 2^252 * 8 * G + + .quad 0x9dd1f49927996c02 + .quad 0x0cb3b058e04d1752 + .quad 0x1f7e88967fd02c3e + .quad 0x2f964268cb8b3eb1 + .quad 0x7ba95ba0218f2ada + .quad 0xcff42287330fb9ca + .quad 0xdada496d56c6d907 + .quad 0x5380c296f4beee54 + .quad 0x9d4f270466898d0a + .quad 0x3d0987990aff3f7a + .quad 0xd09ef36267daba45 + .quad 0x7761455e7b1c669c + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index d103ec911c..14293cbc2d 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -8,6 +8,13 @@ // extern void curve25519_x25519 // (uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]) // +// The function has a second prototype considering the arguments as arrays +// of bytes rather than 64-bit words. The underlying code is the same, since +// the x86 platform is little-endian. +// +// extern void curve25519_x25519_byte +// (uint8_t res[static 32],uint8_t scalar[static 32],uint8_t point[static 32]) +// // Given a scalar n and the X coordinate of an input point P = (X,Y) on // curve25519 (Y can live in any extension field of characteristic 2^255-19), // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the @@ -23,6 +30,8 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519) S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_byte) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_byte) .text // Size of individual field elements @@ -584,6 +593,7 @@ movq %rax, 24+P0 S2N_BN_SYMBOL(curve25519_x25519): +S2N_BN_SYMBOL(curve25519_x25519_byte): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 1d4ab64bc4..981f6e7417 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -8,6 +8,13 @@ // extern void curve25519_x25519_alt // (uint64_t res[static 4],uint64_t scalar[static 4],uint64_t point[static 4]) // +// The function has a second prototype considering the arguments as arrays +// of bytes rather than 64-bit words. The underlying code is the same, since +// the x86 platform is little-endian. +// +// extern void curve25519_x25519_byte_alt +// (uint8_t res[static 32],uint8_t scalar[static 32],uint8_t point[static 32]) +// // Given a scalar n and the X coordinate of an input point P = (X,Y) on // curve25519 (Y can live in any extension field of characteristic 2^255-19), // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the @@ -23,6 +30,8 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_alt) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519_byte_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519_byte_alt) .text // Size of individual field elements @@ -745,6 +754,7 @@ movq %rax, 24+P0 S2N_BN_SYMBOL(curve25519_x25519_alt): +S2N_BN_SYMBOL(curve25519_x25519_byte_alt): #if WINDOWS_ABI pushq %rdi diff --git a/x86_att/curve25519/curve25519_x25519base.S b/x86_att/curve25519/curve25519_x25519base.S index 1f9ee2377c..f7ffa2b838 100644 --- a/x86_att/curve25519/curve25519_x25519base.S +++ b/x86_att/curve25519/curve25519_x25519base.S @@ -6,7 +6,14 @@ // Input scalar[4]; output res[4] // // extern void curve25519_x25519base -// (uint64_t res[static 4],uint64_t scalar[static 4]); +// (uint64_t res[static 4],uint64_t scalar[static 4]) +// +// The function has a second prototype considering the arguments as arrays +// of bytes rather than 64-bit words. The underlying code is the same, since +// the x86 platform is little-endian. +// +// extern void curve25519_x25519base_byte +// (uint8_t res[static 32],uint8_t scalar[static 32]) // // Given a scalar n, returns the X coordinate of n * G where G = (9,...) is // the standard generator. The scalar is first slightly modified/mangled @@ -20,6 +27,8 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base) S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_byte) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_byte) .text // Size of individual field elements @@ -328,6 +337,7 @@ movq %r11, 0x18+P0 S2N_BN_SYMBOL(curve25519_x25519base): +S2N_BN_SYMBOL(curve25519_x25519base_byte): // In this case the Windows form literally makes a subroutine call. // This avoids hassle arising from keeping code and data together. diff --git a/x86_att/curve25519/curve25519_x25519base_alt.S b/x86_att/curve25519/curve25519_x25519base_alt.S index 0027f47f90..c90bd1bc9f 100644 --- a/x86_att/curve25519/curve25519_x25519base_alt.S +++ b/x86_att/curve25519/curve25519_x25519base_alt.S @@ -8,6 +8,13 @@ // extern void curve25519_x25519base_alt // (uint64_t res[static 4],uint64_t scalar[static 4]); // +// The function has a second prototype considering the arguments as arrays +// of bytes rather than 64-bit words. The underlying code is the same, since +// the x86 platform is little-endian. +// +// extern void curve25519_x25519base_byte_alt +// (uint8_t res[static 32],uint8_t scalar[static 32]) +// // Given a scalar n, returns the X coordinate of n * G where G = (9,...) is // the standard generator. The scalar is first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748). @@ -20,6 +27,8 @@ S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_alt) S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_alt) + S2N_BN_SYM_VISIBILITY_DIRECTIVE(curve25519_x25519base_byte_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(curve25519_x25519base_byte_alt) .text // Size of individual field elements @@ -404,6 +413,7 @@ movq %r11, 0x18+P0 S2N_BN_SYMBOL(curve25519_x25519base_alt): +S2N_BN_SYMBOL(curve25519_x25519base_byte_alt): // In this case the Windows form literally makes a subroutine call. // This avoids hassle arising from keeping code and data together. From 68870814c955e04c5618e72facbd4eec36169aaa Mon Sep 17 00:00:00 2001 From: Torben Hansen <50673096+torben-hansen@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:26:27 -0700 Subject: [PATCH 32/42] Per file namespace for symbolic labels s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/da2b90c7fc45b77c639528cea7898575c73f6f39 --- arm/curve25519/curve25519_x25519.S | 84 +++++++-------- arm/curve25519/curve25519_x25519_alt.S | 84 +++++++-------- arm/curve25519/curve25519_x25519_byte.S | 84 +++++++-------- arm/curve25519/curve25519_x25519_byte_alt.S | 84 +++++++-------- arm/curve25519/curve25519_x25519base.S | 100 +++++++++--------- arm/curve25519/curve25519_x25519base_alt.S | 100 +++++++++--------- arm/curve25519/curve25519_x25519base_byte.S | 100 +++++++++--------- .../curve25519_x25519base_byte_alt.S | 100 +++++++++--------- x86_att/curve25519/curve25519_x25519.S | 72 ++++++------- x86_att/curve25519/curve25519_x25519_alt.S | 72 ++++++------- x86_att/curve25519/curve25519_x25519base.S | 92 ++++++++-------- .../curve25519/curve25519_x25519base_alt.S | 92 ++++++++-------- 12 files changed, 532 insertions(+), 532 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index fadc512553..d66884d5d4 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -732,7 +732,7 @@ S2N_BN_SYMBOL(curve25519_x25519): mov i, #253 -scalarloop: +curve25519_x25519_scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn @@ -804,7 +804,7 @@ scalarloop: sub i, i, #1 cmp i, #3 - bcs scalarloop + bcs curve25519_x25519_scalarloop // Multiplex directly into (xn,zn) then do three pure doubling steps; // this accounts for the implicit zeroing of the three lowest bits @@ -874,7 +874,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -883,7 +883,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -900,7 +900,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -911,7 +911,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -925,7 +925,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -945,7 +945,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -967,13 +967,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -1000,7 +1000,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -1011,8 +1011,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519_wmontend +curve25519_x25519_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -1024,24 +1024,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519_wmontloop +curve25519_x25519_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -1049,7 +1049,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -1058,8 +1058,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519_zmontend +curve25519_x25519_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1071,24 +1071,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519_zmontloop +curve25519_x25519_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1096,13 +1096,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1129,13 +1129,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519_negskip1 +curve25519_x25519_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1145,8 +1145,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519_negloop1 +curve25519_x25519_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1155,8 +1155,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519_negskip2 +curve25519_x25519_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1166,15 +1166,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519_negloop2 +curve25519_x25519_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1183,11 +1183,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1196,9 +1196,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519_outerloop // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 8072c0fe7f..4e9b91b48e 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -516,7 +516,7 @@ S2N_BN_SYMBOL(curve25519_x25519_alt): mov i, #253 -scalarloop: +curve25519_x25519_alt_scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn @@ -588,7 +588,7 @@ scalarloop: sub i, i, #1 cmp i, #3 - bcs scalarloop + bcs curve25519_x25519_alt_scalarloop // Multiplex directly into (xn,zn) then do three pure doubling steps; // this accounts for the implicit zeroing of the three lowest bits @@ -658,7 +658,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519_alt_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -667,7 +667,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519_alt_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -684,7 +684,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519_alt_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -695,7 +695,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519_alt_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -709,7 +709,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519_alt_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -729,7 +729,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519_alt_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -751,13 +751,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519_alt_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519_alt_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -784,7 +784,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519_alt_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -795,8 +795,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519_alt_wmontend +curve25519_x25519_alt_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -808,24 +808,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519_alt_wmontloop +curve25519_x25519_alt_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519_alt_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519_alt_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519_alt_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -833,7 +833,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519_alt_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -842,8 +842,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519_alt_zmontend +curve25519_x25519_alt_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -855,24 +855,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519_alt_zmontloop +curve25519_x25519_alt_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519_alt_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519_alt_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519_alt_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -880,13 +880,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519_alt_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519_alt_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -913,13 +913,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519_alt_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519_alt_negskip1 +curve25519_x25519_alt_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -929,8 +929,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519_alt_negloop1 +curve25519_x25519_alt_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -939,8 +939,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519_alt_negskip2 +curve25519_x25519_alt_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -950,15 +950,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519_alt_negloop2 +curve25519_x25519_alt_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519_alt_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -967,11 +967,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519_alt_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519_alt_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -980,9 +980,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519_alt_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519_alt_outerloop // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a diff --git a/arm/curve25519/curve25519_x25519_byte.S b/arm/curve25519/curve25519_x25519_byte.S index accdf93ecf..d64eb73ed2 100644 --- a/arm/curve25519/curve25519_x25519_byte.S +++ b/arm/curve25519/curve25519_x25519_byte.S @@ -850,7 +850,7 @@ S2N_BN_SYMBOL(curve25519_x25519_byte): mov i, #253 -scalarloop: +curve25519_x25519_byte_scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn @@ -922,7 +922,7 @@ scalarloop: sub i, i, #1 cmp i, #3 - bcs scalarloop + bcs curve25519_x25519_byte_scalarloop // Multiplex directly into (xn,zn) then do three pure doubling steps; // this accounts for the implicit zeroing of the three lowest bits @@ -992,7 +992,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519_byte_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -1001,7 +1001,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519_byte_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -1018,7 +1018,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519_byte_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -1029,7 +1029,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519_byte_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -1043,7 +1043,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519_byte_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -1063,7 +1063,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519_byte_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -1085,13 +1085,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519_byte_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519_byte_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -1118,7 +1118,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519_byte_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -1129,8 +1129,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519_byte_wmontend +curve25519_x25519_byte_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -1142,24 +1142,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519_byte_wmontloop +curve25519_x25519_byte_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519_byte_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519_byte_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519_byte_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -1167,7 +1167,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519_byte_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -1176,8 +1176,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519_byte_zmontend +curve25519_x25519_byte_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1189,24 +1189,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519_byte_zmontloop +curve25519_x25519_byte_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519_byte_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519_byte_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519_byte_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1214,13 +1214,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519_byte_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519_byte_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1247,13 +1247,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519_byte_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519_byte_negskip1 +curve25519_x25519_byte_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1263,8 +1263,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519_byte_negloop1 +curve25519_x25519_byte_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1273,8 +1273,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519_byte_negskip2 +curve25519_x25519_byte_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1284,15 +1284,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519_byte_negloop2 +curve25519_x25519_byte_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519_byte_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1301,11 +1301,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519_byte_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519_byte_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1314,9 +1314,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519_byte_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519_byte_outerloop // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a diff --git a/arm/curve25519/curve25519_x25519_byte_alt.S b/arm/curve25519/curve25519_x25519_byte_alt.S index c291f8d828..7f79cfd803 100644 --- a/arm/curve25519/curve25519_x25519_byte_alt.S +++ b/arm/curve25519/curve25519_x25519_byte_alt.S @@ -634,7 +634,7 @@ S2N_BN_SYMBOL(curve25519_x25519_byte_alt): mov i, #253 -scalarloop: +curve25519_x25519_byte_alt_scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn @@ -706,7 +706,7 @@ scalarloop: sub i, i, #1 cmp i, #3 - bcs scalarloop + bcs curve25519_x25519_byte_alt_scalarloop // Multiplex directly into (xn,zn) then do three pure doubling steps; // this accounts for the implicit zeroing of the three lowest bits @@ -776,7 +776,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519_byte_alt_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -785,7 +785,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519_byte_alt_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -802,7 +802,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519_byte_alt_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -813,7 +813,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519_byte_alt_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -827,7 +827,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519_byte_alt_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -847,7 +847,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519_byte_alt_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -869,13 +869,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519_byte_alt_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519_byte_alt_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -902,7 +902,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519_byte_alt_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -913,8 +913,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519_byte_alt_wmontend +curve25519_x25519_byte_alt_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -926,24 +926,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519_byte_alt_wmontloop +curve25519_x25519_byte_alt_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519_byte_alt_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519_byte_alt_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519_byte_alt_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -951,7 +951,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519_byte_alt_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -960,8 +960,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519_byte_alt_zmontend +curve25519_x25519_byte_alt_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -973,24 +973,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519_byte_alt_zmontloop +curve25519_x25519_byte_alt_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519_byte_alt_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519_byte_alt_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519_byte_alt_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -998,13 +998,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519_byte_alt_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519_byte_alt_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1031,13 +1031,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519_byte_alt_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519_byte_alt_negskip1 +curve25519_x25519_byte_alt_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1047,8 +1047,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519_byte_alt_negloop1 +curve25519_x25519_byte_alt_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1057,8 +1057,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519_byte_alt_negskip2 +curve25519_x25519_byte_alt_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1068,15 +1068,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519_byte_alt_negloop2 +curve25519_x25519_byte_alt_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519_byte_alt_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1085,11 +1085,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519_byte_alt_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519_byte_alt_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1098,9 +1098,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519_byte_alt_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519_byte_alt_outerloop // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a diff --git a/arm/curve25519/curve25519_x25519base.S b/arm/curve25519/curve25519_x25519base.S index 41d5f5b414..030fa08e24 100644 --- a/arm/curve25519/curve25519_x25519base.S +++ b/arm/curve25519/curve25519_x25519base.S @@ -535,8 +535,8 @@ S2N_BN_SYMBOL(curve25519_x25519base): ldr x0, [scalar] ands xzr, x0, #8 - adr x10, edwards25519_0g - adr x11, edwards25519_8g + adr x10, curve25519_x25519base_edwards25519_0g + adr x11, curve25519_x25519base_edwards25519_8g ldp x0, x1, [x10] ldp x2, x3, [x11] csel x0, x0, x2, eq @@ -592,12 +592,12 @@ S2N_BN_SYMBOL(curve25519_x25519base): // l >= 9 case cannot arise on the last iteration. mov i, 4 - adr tab, edwards25519_gtable + adr tab, curve25519_x25519base_edwards25519_gtable mov bias, xzr // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +curve25519_x25519base_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -880,7 +880,7 @@ scalarloop: add i, i, 4 cmp i, 256 - bcc scalarloop + bcc curve25519_x25519base_scalarloop // Now we need to translate from Edwards curve edwards25519 back // to the Montgomery form curve25519. The mapping in the affine @@ -917,7 +917,7 @@ scalarloop: mov x0, 4 add x1, x_3 add x2, z_3 - adr x3, p_25519 + adr x3, curve25519_x25519base_p_25519 add x4, tmpspace // Inline copy of bignum_modinv, identical except for stripping out the @@ -929,7 +929,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519base_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -938,7 +938,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519base_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -955,7 +955,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519base_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -966,7 +966,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519base_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -980,7 +980,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519base_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -1000,7 +1000,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519base_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -1022,13 +1022,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519base_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519base_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -1055,7 +1055,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519base_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -1066,8 +1066,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519base_wmontend +curve25519_x25519base_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -1079,24 +1079,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519base_wmontloop +curve25519_x25519base_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519base_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519base_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519base_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -1104,7 +1104,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519base_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -1113,8 +1113,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519base_zmontend +curve25519_x25519base_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1126,24 +1126,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519base_zmontloop +curve25519_x25519base_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519base_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519base_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519base_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1151,13 +1151,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519base_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519base_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1184,13 +1184,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519base_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519base_negskip1 +curve25519_x25519base_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1200,8 +1200,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519base_negloop1 +curve25519_x25519base_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1210,8 +1210,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519base_negskip2 +curve25519_x25519base_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1221,15 +1221,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519base_negloop2 +curve25519_x25519base_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519base_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1238,11 +1238,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519base_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519base_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1251,9 +1251,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519base_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519base_outerloop // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1281,7 +1281,7 @@ zfliploop: // The modulus p_25519 = 2^255 - 19, for the modular inverse -p_25519: +curve25519_x25519base_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1290,7 +1290,7 @@ p_25519: // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +curve25519_x25519base_edwards25519_0g: .quad 0x251037f7cf4e861d .quad 0x10ede0fb19fb128f @@ -1307,7 +1307,7 @@ edwards25519_0g: .quad 0x72e302a348492870 .quad 0x1253c19e53dbe1bc -edwards25519_8g: +curve25519_x25519base_edwards25519_8g: .quad 0x331d086e0d9abcaa .quad 0x1e23c96d311a10c9 @@ -1327,7 +1327,7 @@ edwards25519_8g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +curve25519_x25519base_edwards25519_gtable: // 2^4 * 1 * G diff --git a/arm/curve25519/curve25519_x25519base_alt.S b/arm/curve25519/curve25519_x25519base_alt.S index cb8354c824..97d2e9c54f 100644 --- a/arm/curve25519/curve25519_x25519base_alt.S +++ b/arm/curve25519/curve25519_x25519base_alt.S @@ -377,8 +377,8 @@ S2N_BN_SYMBOL(curve25519_x25519base_alt): ldr x0, [scalar] ands xzr, x0, #8 - adr x10, edwards25519_0g - adr x11, edwards25519_8g + adr x10, curve25519_x25519base_alt_edwards25519_0g + adr x11, curve25519_x25519base_alt_edwards25519_8g ldp x0, x1, [x10] ldp x2, x3, [x11] csel x0, x0, x2, eq @@ -434,12 +434,12 @@ S2N_BN_SYMBOL(curve25519_x25519base_alt): // l >= 9 case cannot arise on the last iteration. mov i, 4 - adr tab, edwards25519_gtable + adr tab, curve25519_x25519base_alt_edwards25519_gtable mov bias, xzr // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +curve25519_x25519base_alt_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -722,7 +722,7 @@ scalarloop: add i, i, 4 cmp i, 256 - bcc scalarloop + bcc curve25519_x25519base_alt_scalarloop // Now we need to translate from Edwards curve edwards25519 back // to the Montgomery form curve25519. The mapping in the affine @@ -759,7 +759,7 @@ scalarloop: mov x0, 4 add x1, x_3 add x2, z_3 - adr x3, p_25519 + adr x3, curve25519_x25519base_alt_p_25519 add x4, tmpspace // Inline copy of bignum_modinv, identical except for stripping out the @@ -771,7 +771,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519base_alt_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -780,7 +780,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519base_alt_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -797,7 +797,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519base_alt_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -808,7 +808,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519base_alt_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -822,7 +822,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519base_alt_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -842,7 +842,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519base_alt_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -864,13 +864,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519base_alt_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519base_alt_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -897,7 +897,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519base_alt_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -908,8 +908,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519base_alt_wmontend +curve25519_x25519base_alt_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -921,24 +921,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519base_alt_wmontloop +curve25519_x25519base_alt_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519base_alt_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519base_alt_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519base_alt_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -946,7 +946,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519base_alt_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -955,8 +955,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519base_alt_zmontend +curve25519_x25519base_alt_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -968,24 +968,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519base_alt_zmontloop +curve25519_x25519base_alt_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519base_alt_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519base_alt_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519base_alt_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -993,13 +993,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519base_alt_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519base_alt_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1026,13 +1026,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519base_alt_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519base_alt_negskip1 +curve25519_x25519base_alt_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1042,8 +1042,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519base_alt_negloop1 +curve25519_x25519base_alt_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1052,8 +1052,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519base_alt_negskip2 +curve25519_x25519base_alt_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1063,15 +1063,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519base_alt_negloop2 +curve25519_x25519base_alt_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519base_alt_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1080,11 +1080,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519base_alt_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519base_alt_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1093,9 +1093,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519base_alt_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519base_alt_outerloop // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1123,7 +1123,7 @@ zfliploop: // The modulus p_25519 = 2^255 - 19, for the modular inverse -p_25519: +curve25519_x25519base_alt_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1132,7 +1132,7 @@ p_25519: // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +curve25519_x25519base_alt_edwards25519_0g: .quad 0x251037f7cf4e861d .quad 0x10ede0fb19fb128f @@ -1149,7 +1149,7 @@ edwards25519_0g: .quad 0x72e302a348492870 .quad 0x1253c19e53dbe1bc -edwards25519_8g: +curve25519_x25519base_alt_edwards25519_8g: .quad 0x331d086e0d9abcaa .quad 0x1e23c96d311a10c9 @@ -1169,7 +1169,7 @@ edwards25519_8g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +curve25519_x25519base_alt_edwards25519_gtable: // 2^4 * 1 * G diff --git a/arm/curve25519/curve25519_x25519base_byte.S b/arm/curve25519/curve25519_x25519base_byte.S index 651aea49c2..b6d95f58c9 100644 --- a/arm/curve25519/curve25519_x25519base_byte.S +++ b/arm/curve25519/curve25519_x25519base_byte.S @@ -594,8 +594,8 @@ S2N_BN_SYMBOL(curve25519_x25519base_byte): ldr x0, [scalar] ands xzr, x0, #8 - adr x10, edwards25519_0g - adr x11, edwards25519_8g + adr x10, curve25519_x25519base_byte_edwards25519_0g + adr x11, curve25519_x25519base_byte_edwards25519_8g ldp x0, x1, [x10] ldp x2, x3, [x11] csel x0, x0, x2, eq @@ -651,12 +651,12 @@ S2N_BN_SYMBOL(curve25519_x25519base_byte): // l >= 9 case cannot arise on the last iteration. mov i, 4 - adr tab, edwards25519_gtable + adr tab, curve25519_x25519base_byte_edwards25519_gtable mov bias, xzr // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +curve25519_x25519base_byte_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -939,7 +939,7 @@ scalarloop: add i, i, 4 cmp i, 256 - bcc scalarloop + bcc curve25519_x25519base_byte_scalarloop // Now we need to translate from Edwards curve edwards25519 back // to the Montgomery form curve25519. The mapping in the affine @@ -976,7 +976,7 @@ scalarloop: mov x0, 4 add x1, x_3 add x2, z_3 - adr x3, p_25519 + adr x3, curve25519_x25519base_byte_p_25519 add x4, tmpspace // Inline copy of bignum_modinv, identical except for stripping out the @@ -988,7 +988,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519base_byte_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -997,7 +997,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519base_byte_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -1014,7 +1014,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519base_byte_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -1025,7 +1025,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519base_byte_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -1039,7 +1039,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519base_byte_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -1059,7 +1059,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519base_byte_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -1081,13 +1081,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519base_byte_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519base_byte_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -1114,7 +1114,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519base_byte_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -1125,8 +1125,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519base_byte_wmontend +curve25519_x25519base_byte_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -1138,24 +1138,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519base_byte_wmontloop +curve25519_x25519base_byte_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519base_byte_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519base_byte_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519base_byte_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -1163,7 +1163,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519base_byte_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -1172,8 +1172,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519base_byte_zmontend +curve25519_x25519base_byte_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1185,24 +1185,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519base_byte_zmontloop +curve25519_x25519base_byte_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519base_byte_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519base_byte_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519base_byte_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1210,13 +1210,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519base_byte_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519base_byte_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1243,13 +1243,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519base_byte_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519base_byte_negskip1 +curve25519_x25519base_byte_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1259,8 +1259,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519base_byte_negloop1 +curve25519_x25519base_byte_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1269,8 +1269,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519base_byte_negskip2 +curve25519_x25519base_byte_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1280,15 +1280,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519base_byte_negloop2 +curve25519_x25519base_byte_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519base_byte_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1297,11 +1297,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519base_byte_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519base_byte_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1310,9 +1310,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519base_byte_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519base_byte_outerloop // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1407,7 +1407,7 @@ zfliploop: // The modulus p_25519 = 2^255 - 19, for the modular inverse -p_25519: +curve25519_x25519base_byte_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1416,7 +1416,7 @@ p_25519: // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +curve25519_x25519base_byte_edwards25519_0g: .quad 0x251037f7cf4e861d .quad 0x10ede0fb19fb128f @@ -1433,7 +1433,7 @@ edwards25519_0g: .quad 0x72e302a348492870 .quad 0x1253c19e53dbe1bc -edwards25519_8g: +curve25519_x25519base_byte_edwards25519_8g: .quad 0x331d086e0d9abcaa .quad 0x1e23c96d311a10c9 @@ -1453,7 +1453,7 @@ edwards25519_8g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +curve25519_x25519base_byte_edwards25519_gtable: // 2^4 * 1 * G diff --git a/arm/curve25519/curve25519_x25519base_byte_alt.S b/arm/curve25519/curve25519_x25519base_byte_alt.S index c3fec7581e..6e61199732 100644 --- a/arm/curve25519/curve25519_x25519base_byte_alt.S +++ b/arm/curve25519/curve25519_x25519base_byte_alt.S @@ -436,8 +436,8 @@ S2N_BN_SYMBOL(curve25519_x25519base_byte_alt): ldr x0, [scalar] ands xzr, x0, #8 - adr x10, edwards25519_0g - adr x11, edwards25519_8g + adr x10, curve25519_x25519base_byte_alt_edwards25519_0g + adr x11, curve25519_x25519base_byte_alt_edwards25519_8g ldp x0, x1, [x10] ldp x2, x3, [x11] csel x0, x0, x2, eq @@ -493,12 +493,12 @@ S2N_BN_SYMBOL(curve25519_x25519base_byte_alt): // l >= 9 case cannot arise on the last iteration. mov i, 4 - adr tab, edwards25519_gtable + adr tab, curve25519_x25519base_byte_alt_edwards25519_gtable mov bias, xzr // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +curve25519_x25519base_byte_alt_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -781,7 +781,7 @@ scalarloop: add i, i, 4 cmp i, 256 - bcc scalarloop + bcc curve25519_x25519base_byte_alt_scalarloop // Now we need to translate from Edwards curve edwards25519 back // to the Montgomery form curve25519. The mapping in the affine @@ -818,7 +818,7 @@ scalarloop: mov x0, 4 add x1, x_3 add x2, z_3 - adr x3, p_25519 + adr x3, curve25519_x25519base_byte_alt_p_25519 add x4, tmpspace // Inline copy of bignum_modinv, identical except for stripping out the @@ -830,7 +830,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +curve25519_x25519base_byte_alt_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -839,7 +839,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc curve25519_x25519base_byte_alt_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -856,7 +856,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +curve25519_x25519base_byte_alt_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -867,7 +867,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +curve25519_x25519base_byte_alt_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -881,7 +881,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc curve25519_x25519base_byte_alt_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -901,7 +901,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +curve25519_x25519base_byte_alt_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -923,13 +923,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, curve25519_x25519base_byte_alt_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +curve25519_x25519base_byte_alt_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -956,7 +956,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc curve25519_x25519base_byte_alt_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -967,8 +967,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, curve25519_x25519base_byte_alt_wmontend +curve25519_x25519base_byte_alt_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -980,24 +980,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, curve25519_x25519base_byte_alt_wmontloop +curve25519_x25519base_byte_alt_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +curve25519_x25519base_byte_alt_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, curve25519_x25519base_byte_alt_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +curve25519_x25519base_byte_alt_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -1005,7 +1005,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, curve25519_x25519base_byte_alt_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -1014,8 +1014,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, curve25519_x25519base_byte_alt_zmontend +curve25519_x25519base_byte_alt_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1027,24 +1027,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, curve25519_x25519base_byte_alt_zmontloop +curve25519_x25519base_byte_alt_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +curve25519_x25519base_byte_alt_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, curve25519_x25519base_byte_alt_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +curve25519_x25519base_byte_alt_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1052,13 +1052,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, curve25519_x25519base_byte_alt_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +curve25519_x25519base_byte_alt_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1085,13 +1085,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc curve25519_x25519base_byte_alt_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, curve25519_x25519base_byte_alt_negskip1 +curve25519_x25519base_byte_alt_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1101,8 +1101,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, curve25519_x25519base_byte_alt_negloop1 +curve25519_x25519base_byte_alt_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1111,8 +1111,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, curve25519_x25519base_byte_alt_negskip2 +curve25519_x25519base_byte_alt_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1122,15 +1122,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, curve25519_x25519base_byte_alt_negloop2 +curve25519_x25519base_byte_alt_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +curve25519_x25519base_byte_alt_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1139,11 +1139,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, curve25519_x25519base_byte_alt_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +curve25519_x25519base_byte_alt_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1152,9 +1152,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, curve25519_x25519base_byte_alt_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi curve25519_x25519base_byte_alt_outerloop // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1248,7 +1248,7 @@ zfliploop: // The modulus p_25519 = 2^255 - 19, for the modular inverse -p_25519: +curve25519_x25519base_byte_alt_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1257,7 +1257,7 @@ p_25519: // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +curve25519_x25519base_byte_alt_edwards25519_0g: .quad 0x251037f7cf4e861d .quad 0x10ede0fb19fb128f @@ -1274,7 +1274,7 @@ edwards25519_0g: .quad 0x72e302a348492870 .quad 0x1253c19e53dbe1bc -edwards25519_8g: +curve25519_x25519base_byte_alt_edwards25519_8g: .quad 0x331d086e0d9abcaa .quad 0x1e23c96d311a10c9 @@ -1294,7 +1294,7 @@ edwards25519_8g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +curve25519_x25519base_byte_alt_edwards25519_gtable: // 2^4 * 1 * G diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 14293cbc2d..2a97ee9407 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -680,7 +680,7 @@ S2N_BN_SYMBOL(curve25519_x25519_byte): movl $253, %eax movq %rax, i -scalarloop: +curve25519_x25519_scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn @@ -750,7 +750,7 @@ scalarloop: subq $1, %rax movq %rax, i cmpq $3, %rax - jnc scalarloop + jnc curve25519_x25519_scalarloop // Multiplex directly into (xn,zn) then do three pure doubling steps; // this accounts for the implicit zeroing of the three lowest bits @@ -823,7 +823,7 @@ scalarloop: leaq (%r10,%rdi,8), %r15 xorq %r11, %r11 xorq %r9, %r9 -copyloop: +curve25519_x25519_copyloop: movq (%rdx,%r9,8), %rax movq (%rcx,%r9,8), %rbx movq %rax, (%r10,%r9,8) @@ -832,7 +832,7 @@ copyloop: movq %r11, (%rsi,%r9,8) incq %r9 cmpq %rdi, %r9 - jb copyloop + jb curve25519_x25519_copyloop movq (%r8), %rax movq %rax, %rbx decq %rbx @@ -864,7 +864,7 @@ copyloop: movq %rdi, %rax shlq $0x7, %rax movq %rax, 0x20(%rsp) -outerloop: +curve25519_x25519_outerloop: movq 0x20(%rsp), %r13 addq $0x3f, %r13 shrq $0x6, %r13 @@ -878,7 +878,7 @@ outerloop: movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 -toploop: +curve25519_x25519_toploop: movq (%r8,%r9,8), %rbx movq (%r15,%r9,8), %rcx movq %r11, %r10 @@ -894,7 +894,7 @@ toploop: sbbq %r11, %r11 incq %r9 cmpq %r13, %r9 - jb toploop + jb curve25519_x25519_toploop movq %r12, %rax orq %rbp, %rax bsrq %rax, %rcx @@ -914,7 +914,7 @@ toploop: movq %r13, 0x10(%rsp) movq %r8, (%rsp) movq %r15, 0x18(%rsp) -innerloop: +curve25519_x25519_innerloop: xorl %eax, %eax xorl %ebx, %ebx xorq %r8, %r8 @@ -944,7 +944,7 @@ innerloop: addq %rcx, %rcx addq %rdx, %rdx decq %r9 - jne innerloop + jne curve25519_x25519_innerloop movq 0x8(%rsp), %rdi movq 0x10(%rsp), %r13 movq (%rsp), %r8 @@ -960,7 +960,7 @@ innerloop: xorq %r10, %r10 xorq %r11, %r11 xorq %r9, %r9 -congloop: +curve25519_x25519_congloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -991,7 +991,7 @@ congloop: movq %rbp, %rsi incq %r9 cmpq %rdi, %r9 - jb congloop + jb curve25519_x25519_congloop shldq $0x6, %r10, %r14 shldq $0x6, %r11, %rsi movq 0x48(%rsp), %r15 @@ -1005,8 +1005,8 @@ congloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je wmontend -wmontloop: + je curve25519_x25519_wmontend +curve25519_x25519_wmontloop: adcq (%r8,%r9,8), %r10 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1017,26 +1017,26 @@ wmontloop: movq %rdx, %r10 incq %r9 decq %rcx - jne wmontloop -wmontend: + jne curve25519_x25519_wmontloop +curve25519_x25519_wmontend: adcq %r14, %r10 movq %r10, -0x8(%r8,%rdi,8) sbbq %r10, %r10 negq %r10 movq %rdi, %rcx xorq %r9, %r9 -wcmploop: +curve25519_x25519_wcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne wcmploop + jne curve25519_x25519_wcmploop sbbq $0x0, %r10 sbbq %r10, %r10 notq %r10 xorq %rcx, %rcx xorq %r9, %r9 -wcorrloop: +curve25519_x25519_wcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r10, %rbx @@ -1046,7 +1046,7 @@ wcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb wcorrloop + jb curve25519_x25519_wcorrloop movq 0x40(%rsp), %r8 movq (%r8), %rbx movq 0x28(%rsp), %rbp @@ -1058,8 +1058,8 @@ wcorrloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je zmontend -zmontloop: + je curve25519_x25519_zmontend +curve25519_x25519_zmontloop: adcq (%r8,%r9,8), %r11 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1070,26 +1070,26 @@ zmontloop: movq %rdx, %r11 incq %r9 decq %rcx - jne zmontloop -zmontend: + jne curve25519_x25519_zmontloop +curve25519_x25519_zmontend: adcq %rsi, %r11 movq %r11, -0x8(%r8,%rdi,8) sbbq %r11, %r11 negq %r11 movq %rdi, %rcx xorq %r9, %r9 -zcmploop: +curve25519_x25519_zcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne zcmploop + jne curve25519_x25519_zcmploop sbbq $0x0, %r11 sbbq %r11, %r11 notq %r11 xorq %rcx, %rcx xorq %r9, %r9 -zcorrloop: +curve25519_x25519_zcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r11, %rbx @@ -1099,7 +1099,7 @@ zcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb zcorrloop + jb curve25519_x25519_zcorrloop movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 @@ -1107,7 +1107,7 @@ zcorrloop: xorq %r14, %r14 xorq %rbp, %rbp xorq %rsi, %rsi -crossloop: +curve25519_x25519_crossloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1138,13 +1138,13 @@ crossloop: movq %r11, %rsi incq %r9 cmpq %r13, %r9 - jb crossloop + jb curve25519_x25519_crossloop xorq %r9, %r9 movq %r12, %r10 movq %rbp, %r11 xorq %r12, %r14 xorq %rbp, %rsi -optnegloop: +curve25519_x25519_optnegloop: movq (%r8,%r9,8), %rax xorq %r12, %rax negq %r10 @@ -1159,11 +1159,11 @@ optnegloop: movq %rax, (%r15,%r9,8) incq %r9 cmpq %r13, %r9 - jb optnegloop + jb curve25519_x25519_optnegloop subq %r10, %r14 subq %r11, %rsi movq %r13, %r9 -shiftloop: +curve25519_x25519_shiftloop: movq -0x8(%r8,%r9,8), %rax movq %rax, %r10 shrdq $0x3a, %r14, %rax @@ -1175,7 +1175,7 @@ shiftloop: movq %rax, -0x8(%r15,%r9,8) movq %r11, %rsi decq %r9 - jne shiftloop + jne curve25519_x25519_shiftloop notq %rbp movq 0x48(%rsp), %rcx movq 0x38(%rsp), %r8 @@ -1183,7 +1183,7 @@ shiftloop: movq %r12, %r10 movq %rbp, %r11 xorq %r9, %r9 -fliploop: +curve25519_x25519_fliploop: movq %rbp, %rdx movq (%rcx,%r9,8), %rax andq %rax, %rdx @@ -1202,9 +1202,9 @@ fliploop: movq %rdx, (%r15,%r9,8) incq %r9 cmpq %rdi, %r9 - jb fliploop + jb curve25519_x25519_fliploop subq $0x3a, 0x20(%rsp) - ja outerloop + ja curve25519_x25519_outerloop // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 981f6e7417..241c4505af 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -841,7 +841,7 @@ S2N_BN_SYMBOL(curve25519_x25519_byte_alt): movl $253, %eax movq %rax, i -scalarloop: +curve25519_x25519_alt_scalarloop: // sm = xm + zm; sn = xn + zn; dm = xm - zm; dn = xn - zn @@ -911,7 +911,7 @@ scalarloop: subq $1, %rax movq %rax, i cmpq $3, %rax - jnc scalarloop + jnc curve25519_x25519_alt_scalarloop // Multiplex directly into (xn,zn) then do three pure doubling steps; // this accounts for the implicit zeroing of the three lowest bits @@ -984,7 +984,7 @@ scalarloop: leaq (%r10,%rdi,8), %r15 xorq %r11, %r11 xorq %r9, %r9 -copyloop: +curve25519_x25519_alt_copyloop: movq (%rdx,%r9,8), %rax movq (%rcx,%r9,8), %rbx movq %rax, (%r10,%r9,8) @@ -993,7 +993,7 @@ copyloop: movq %r11, (%rsi,%r9,8) incq %r9 cmpq %rdi, %r9 - jb copyloop + jb curve25519_x25519_alt_copyloop movq (%r8), %rax movq %rax, %rbx decq %rbx @@ -1025,7 +1025,7 @@ copyloop: movq %rdi, %rax shlq $0x7, %rax movq %rax, 0x20(%rsp) -outerloop: +curve25519_x25519_alt_outerloop: movq 0x20(%rsp), %r13 addq $0x3f, %r13 shrq $0x6, %r13 @@ -1039,7 +1039,7 @@ outerloop: movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 -toploop: +curve25519_x25519_alt_toploop: movq (%r8,%r9,8), %rbx movq (%r15,%r9,8), %rcx movq %r11, %r10 @@ -1055,7 +1055,7 @@ toploop: sbbq %r11, %r11 incq %r9 cmpq %r13, %r9 - jb toploop + jb curve25519_x25519_alt_toploop movq %r12, %rax orq %rbp, %rax bsrq %rax, %rcx @@ -1075,7 +1075,7 @@ toploop: movq %r13, 0x10(%rsp) movq %r8, (%rsp) movq %r15, 0x18(%rsp) -innerloop: +curve25519_x25519_alt_innerloop: xorl %eax, %eax xorl %ebx, %ebx xorq %r8, %r8 @@ -1105,7 +1105,7 @@ innerloop: addq %rcx, %rcx addq %rdx, %rdx decq %r9 - jne innerloop + jne curve25519_x25519_alt_innerloop movq 0x8(%rsp), %rdi movq 0x10(%rsp), %r13 movq (%rsp), %r8 @@ -1121,7 +1121,7 @@ innerloop: xorq %r10, %r10 xorq %r11, %r11 xorq %r9, %r9 -congloop: +curve25519_x25519_alt_congloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1152,7 +1152,7 @@ congloop: movq %rbp, %rsi incq %r9 cmpq %rdi, %r9 - jb congloop + jb curve25519_x25519_alt_congloop shldq $0x6, %r10, %r14 shldq $0x6, %r11, %rsi movq 0x48(%rsp), %r15 @@ -1166,8 +1166,8 @@ congloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je wmontend -wmontloop: + je curve25519_x25519_alt_wmontend +curve25519_x25519_alt_wmontloop: adcq (%r8,%r9,8), %r10 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1178,26 +1178,26 @@ wmontloop: movq %rdx, %r10 incq %r9 decq %rcx - jne wmontloop -wmontend: + jne curve25519_x25519_alt_wmontloop +curve25519_x25519_alt_wmontend: adcq %r14, %r10 movq %r10, -0x8(%r8,%rdi,8) sbbq %r10, %r10 negq %r10 movq %rdi, %rcx xorq %r9, %r9 -wcmploop: +curve25519_x25519_alt_wcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne wcmploop + jne curve25519_x25519_alt_wcmploop sbbq $0x0, %r10 sbbq %r10, %r10 notq %r10 xorq %rcx, %rcx xorq %r9, %r9 -wcorrloop: +curve25519_x25519_alt_wcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r10, %rbx @@ -1207,7 +1207,7 @@ wcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb wcorrloop + jb curve25519_x25519_alt_wcorrloop movq 0x40(%rsp), %r8 movq (%r8), %rbx movq 0x28(%rsp), %rbp @@ -1219,8 +1219,8 @@ wcorrloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je zmontend -zmontloop: + je curve25519_x25519_alt_zmontend +curve25519_x25519_alt_zmontloop: adcq (%r8,%r9,8), %r11 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1231,26 +1231,26 @@ zmontloop: movq %rdx, %r11 incq %r9 decq %rcx - jne zmontloop -zmontend: + jne curve25519_x25519_alt_zmontloop +curve25519_x25519_alt_zmontend: adcq %rsi, %r11 movq %r11, -0x8(%r8,%rdi,8) sbbq %r11, %r11 negq %r11 movq %rdi, %rcx xorq %r9, %r9 -zcmploop: +curve25519_x25519_alt_zcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne zcmploop + jne curve25519_x25519_alt_zcmploop sbbq $0x0, %r11 sbbq %r11, %r11 notq %r11 xorq %rcx, %rcx xorq %r9, %r9 -zcorrloop: +curve25519_x25519_alt_zcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r11, %rbx @@ -1260,7 +1260,7 @@ zcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb zcorrloop + jb curve25519_x25519_alt_zcorrloop movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 @@ -1268,7 +1268,7 @@ zcorrloop: xorq %r14, %r14 xorq %rbp, %rbp xorq %rsi, %rsi -crossloop: +curve25519_x25519_alt_crossloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1299,13 +1299,13 @@ crossloop: movq %r11, %rsi incq %r9 cmpq %r13, %r9 - jb crossloop + jb curve25519_x25519_alt_crossloop xorq %r9, %r9 movq %r12, %r10 movq %rbp, %r11 xorq %r12, %r14 xorq %rbp, %rsi -optnegloop: +curve25519_x25519_alt_optnegloop: movq (%r8,%r9,8), %rax xorq %r12, %rax negq %r10 @@ -1320,11 +1320,11 @@ optnegloop: movq %rax, (%r15,%r9,8) incq %r9 cmpq %r13, %r9 - jb optnegloop + jb curve25519_x25519_alt_optnegloop subq %r10, %r14 subq %r11, %rsi movq %r13, %r9 -shiftloop: +curve25519_x25519_alt_shiftloop: movq -0x8(%r8,%r9,8), %rax movq %rax, %r10 shrdq $0x3a, %r14, %rax @@ -1336,7 +1336,7 @@ shiftloop: movq %rax, -0x8(%r15,%r9,8) movq %r11, %rsi decq %r9 - jne shiftloop + jne curve25519_x25519_alt_shiftloop notq %rbp movq 0x48(%rsp), %rcx movq 0x38(%rsp), %r8 @@ -1344,7 +1344,7 @@ shiftloop: movq %r12, %r10 movq %rbp, %r11 xorq %r9, %r9 -fliploop: +curve25519_x25519_alt_fliploop: movq %rbp, %rdx movq (%rcx,%r9,8), %rax andq %rax, %rdx @@ -1363,9 +1363,9 @@ fliploop: movq %rdx, (%r15,%r9,8) incq %r9 cmpq %rdi, %r9 - jb fliploop + jb curve25519_x25519_alt_fliploop subq $0x3a, 0x20(%rsp) - ja outerloop + ja curve25519_x25519_alt_outerloop // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a diff --git a/x86_att/curve25519/curve25519_x25519base.S b/x86_att/curve25519/curve25519_x25519base.S index f7ffa2b838..12a5cddd18 100644 --- a/x86_att/curve25519/curve25519_x25519base.S +++ b/x86_att/curve25519/curve25519_x25519base.S @@ -347,12 +347,12 @@ S2N_BN_SYMBOL(curve25519_x25519base_byte): pushq %rsi movq %rcx, %rdi movq %rdx, %rsi - callq curve25519_x25519base_standard + callq curve25519_x25519base_curve25519_x25519base_standard popq %rsi popq %rdi ret -curve25519_x25519base_standard: +curve25519_x25519base_curve25519_x25519base_standard: #endif // Save registers, make room for temps, preserve input arguments. @@ -399,8 +399,8 @@ curve25519_x25519base_standard: movq (%rsp), %rax andq $8, %rax - leaq edwards25519_0g(%rip), %r10 - leaq edwards25519_8g(%rip), %r11 + leaq curve25519_x25519base_edwards25519_0g(%rip), %r10 + leaq curve25519_x25519base_edwards25519_8g(%rip), %r11 movq (%r10), %rax movq (%r11), %rcx @@ -484,13 +484,13 @@ curve25519_x25519base_standard: // l >= 9 case cannot arise on the last iteration. movq $4, i - leaq edwards25519_gtable(%rip), %rax + leaq curve25519_x25519base_edwards25519_gtable(%rip), %rax movq %rax, tab movq $0, bias // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +curve25519_x25519base_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -847,7 +847,7 @@ scalarloop: addq $4, i cmpq $256, i - jc scalarloop + jc curve25519_x25519base_scalarloop // Now we need to translate from Edwards curve edwards25519 back // to the Montgomery form curve25519. The mapping in the affine @@ -884,7 +884,7 @@ scalarloop: movq $4, %rdi leaq 128(%rsp), %rsi leaq 192(%rsp), %rdx - leaq p_25519(%rip), %rcx + leaq curve25519_x25519base_p_25519(%rip), %rcx leaq 256(%rsp), %r8 // Inline copy of bignum_modinv, identical except for stripping out the @@ -902,7 +902,7 @@ scalarloop: leaq (%r10,%rdi,8), %r15 xorq %r11, %r11 xorq %r9, %r9 -copyloop: +curve25519_x25519base_copyloop: movq (%rdx,%r9,8), %rax movq (%rcx,%r9,8), %rbx movq %rax, (%r10,%r9,8) @@ -911,7 +911,7 @@ copyloop: movq %r11, (%rsi,%r9,8) incq %r9 cmpq %rdi, %r9 - jb copyloop + jb curve25519_x25519base_copyloop movq (%r8), %rax movq %rax, %rbx decq %rbx @@ -943,7 +943,7 @@ copyloop: movq %rdi, %rax shlq $0x7, %rax movq %rax, 0x20(%rsp) -outerloop: +curve25519_x25519base_outerloop: movq 0x20(%rsp), %r13 addq $0x3f, %r13 shrq $0x6, %r13 @@ -957,7 +957,7 @@ outerloop: movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 -toploop: +curve25519_x25519base_toploop: movq (%r8,%r9,8), %rbx movq (%r15,%r9,8), %rcx movq %r11, %r10 @@ -973,7 +973,7 @@ toploop: sbbq %r11, %r11 incq %r9 cmpq %r13, %r9 - jb toploop + jb curve25519_x25519base_toploop movq %r12, %rax orq %rbp, %rax bsrq %rax, %rcx @@ -993,7 +993,7 @@ toploop: movq %r13, 0x10(%rsp) movq %r8, (%rsp) movq %r15, 0x18(%rsp) -innerloop: +curve25519_x25519base_innerloop: xorl %eax, %eax xorl %ebx, %ebx xorq %r8, %r8 @@ -1023,7 +1023,7 @@ innerloop: addq %rcx, %rcx addq %rdx, %rdx decq %r9 - jne innerloop + jne curve25519_x25519base_innerloop movq 0x8(%rsp), %rdi movq 0x10(%rsp), %r13 movq (%rsp), %r8 @@ -1039,7 +1039,7 @@ innerloop: xorq %r10, %r10 xorq %r11, %r11 xorq %r9, %r9 -congloop: +curve25519_x25519base_congloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1070,7 +1070,7 @@ congloop: movq %rbp, %rsi incq %r9 cmpq %rdi, %r9 - jb congloop + jb curve25519_x25519base_congloop shldq $0x6, %r10, %r14 shldq $0x6, %r11, %rsi movq 0x48(%rsp), %r15 @@ -1084,8 +1084,8 @@ congloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je wmontend -wmontloop: + je curve25519_x25519base_wmontend +curve25519_x25519base_wmontloop: adcq (%r8,%r9,8), %r10 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1096,26 +1096,26 @@ wmontloop: movq %rdx, %r10 incq %r9 decq %rcx - jne wmontloop -wmontend: + jne curve25519_x25519base_wmontloop +curve25519_x25519base_wmontend: adcq %r14, %r10 movq %r10, -0x8(%r8,%rdi,8) sbbq %r10, %r10 negq %r10 movq %rdi, %rcx xorq %r9, %r9 -wcmploop: +curve25519_x25519base_wcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne wcmploop + jne curve25519_x25519base_wcmploop sbbq $0x0, %r10 sbbq %r10, %r10 notq %r10 xorq %rcx, %rcx xorq %r9, %r9 -wcorrloop: +curve25519_x25519base_wcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r10, %rbx @@ -1125,7 +1125,7 @@ wcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb wcorrloop + jb curve25519_x25519base_wcorrloop movq 0x40(%rsp), %r8 movq (%r8), %rbx movq 0x28(%rsp), %rbp @@ -1137,8 +1137,8 @@ wcorrloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je zmontend -zmontloop: + je curve25519_x25519base_zmontend +curve25519_x25519base_zmontloop: adcq (%r8,%r9,8), %r11 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1149,26 +1149,26 @@ zmontloop: movq %rdx, %r11 incq %r9 decq %rcx - jne zmontloop -zmontend: + jne curve25519_x25519base_zmontloop +curve25519_x25519base_zmontend: adcq %rsi, %r11 movq %r11, -0x8(%r8,%rdi,8) sbbq %r11, %r11 negq %r11 movq %rdi, %rcx xorq %r9, %r9 -zcmploop: +curve25519_x25519base_zcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne zcmploop + jne curve25519_x25519base_zcmploop sbbq $0x0, %r11 sbbq %r11, %r11 notq %r11 xorq %rcx, %rcx xorq %r9, %r9 -zcorrloop: +curve25519_x25519base_zcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r11, %rbx @@ -1178,7 +1178,7 @@ zcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb zcorrloop + jb curve25519_x25519base_zcorrloop movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 @@ -1186,7 +1186,7 @@ zcorrloop: xorq %r14, %r14 xorq %rbp, %rbp xorq %rsi, %rsi -crossloop: +curve25519_x25519base_crossloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1217,13 +1217,13 @@ crossloop: movq %r11, %rsi incq %r9 cmpq %r13, %r9 - jb crossloop + jb curve25519_x25519base_crossloop xorq %r9, %r9 movq %r12, %r10 movq %rbp, %r11 xorq %r12, %r14 xorq %rbp, %rsi -optnegloop: +curve25519_x25519base_optnegloop: movq (%r8,%r9,8), %rax xorq %r12, %rax negq %r10 @@ -1238,11 +1238,11 @@ optnegloop: movq %rax, (%r15,%r9,8) incq %r9 cmpq %r13, %r9 - jb optnegloop + jb curve25519_x25519base_optnegloop subq %r10, %r14 subq %r11, %rsi movq %r13, %r9 -shiftloop: +curve25519_x25519base_shiftloop: movq -0x8(%r8,%r9,8), %rax movq %rax, %r10 shrdq $0x3a, %r14, %rax @@ -1254,7 +1254,7 @@ shiftloop: movq %rax, -0x8(%r15,%r9,8) movq %r11, %rsi decq %r9 - jne shiftloop + jne curve25519_x25519base_shiftloop notq %rbp movq 0x48(%rsp), %rcx movq 0x38(%rsp), %r8 @@ -1262,7 +1262,7 @@ shiftloop: movq %r12, %r10 movq %rbp, %r11 xorq %r9, %r9 -fliploop: +curve25519_x25519base_fliploop: movq %rbp, %rdx movq (%rcx,%r9,8), %rax andq %rax, %rdx @@ -1281,9 +1281,9 @@ fliploop: movq %rdx, (%r15,%r9,8) incq %r9 cmpq %rdi, %r9 - jb fliploop + jb curve25519_x25519base_fliploop subq $0x3a, 0x20(%rsp) - ja outerloop + ja curve25519_x25519base_outerloop // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1315,7 +1315,7 @@ fliploop: // The modulus, for the modular inverse -p_25519: +curve25519_x25519base_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1324,7 +1324,7 @@ p_25519: // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +curve25519_x25519base_edwards25519_0g: .quad 0x251037f7cf4e861d .quad 0x10ede0fb19fb128f @@ -1341,7 +1341,7 @@ edwards25519_0g: .quad 0x72e302a348492870 .quad 0x1253c19e53dbe1bc -edwards25519_8g: +curve25519_x25519base_edwards25519_8g: .quad 0x331d086e0d9abcaa .quad 0x1e23c96d311a10c9 @@ -1361,7 +1361,7 @@ edwards25519_8g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +curve25519_x25519base_edwards25519_gtable: // 2^4 * 1 * G diff --git a/x86_att/curve25519/curve25519_x25519base_alt.S b/x86_att/curve25519/curve25519_x25519base_alt.S index c90bd1bc9f..8a89b1f597 100644 --- a/x86_att/curve25519/curve25519_x25519base_alt.S +++ b/x86_att/curve25519/curve25519_x25519base_alt.S @@ -423,12 +423,12 @@ S2N_BN_SYMBOL(curve25519_x25519base_byte_alt): pushq %rsi movq %rcx, %rdi movq %rdx, %rsi - callq curve25519_x25519base_alt_standard + callq curve25519_x25519base_alt_curve25519_x25519base_alt_standard popq %rsi popq %rdi ret -curve25519_x25519base_alt_standard: +curve25519_x25519base_alt_curve25519_x25519base_alt_standard: #endif // Save registers, make room for temps, preserve input arguments. @@ -475,8 +475,8 @@ curve25519_x25519base_alt_standard: movq (%rsp), %rax andq $8, %rax - leaq edwards25519_0g(%rip), %r10 - leaq edwards25519_8g(%rip), %r11 + leaq curve25519_x25519base_alt_edwards25519_0g(%rip), %r10 + leaq curve25519_x25519base_alt_edwards25519_8g(%rip), %r11 movq (%r10), %rax movq (%r11), %rcx @@ -560,13 +560,13 @@ curve25519_x25519base_alt_standard: // l >= 9 case cannot arise on the last iteration. movq $4, i - leaq edwards25519_gtable(%rip), %rax + leaq curve25519_x25519base_alt_edwards25519_gtable(%rip), %rax movq %rax, tab movq $0, bias // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +curve25519_x25519base_alt_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -923,7 +923,7 @@ scalarloop: addq $4, i cmpq $256, i - jc scalarloop + jc curve25519_x25519base_alt_scalarloop // Now we need to translate from Edwards curve edwards25519 back // to the Montgomery form curve25519. The mapping in the affine @@ -958,7 +958,7 @@ scalarloop: movq $4, %rdi leaq 128(%rsp), %rsi leaq 192(%rsp), %rdx - leaq p_25519(%rip), %rcx + leaq curve25519_x25519base_alt_p_25519(%rip), %rcx leaq 256(%rsp), %r8 // Inline copy of bignum_modinv, identical except for stripping out the @@ -976,7 +976,7 @@ scalarloop: leaq (%r10,%rdi,8), %r15 xorq %r11, %r11 xorq %r9, %r9 -copyloop: +curve25519_x25519base_alt_copyloop: movq (%rdx,%r9,8), %rax movq (%rcx,%r9,8), %rbx movq %rax, (%r10,%r9,8) @@ -985,7 +985,7 @@ copyloop: movq %r11, (%rsi,%r9,8) incq %r9 cmpq %rdi, %r9 - jb copyloop + jb curve25519_x25519base_alt_copyloop movq (%r8), %rax movq %rax, %rbx decq %rbx @@ -1017,7 +1017,7 @@ copyloop: movq %rdi, %rax shlq $0x7, %rax movq %rax, 0x20(%rsp) -outerloop: +curve25519_x25519base_alt_outerloop: movq 0x20(%rsp), %r13 addq $0x3f, %r13 shrq $0x6, %r13 @@ -1031,7 +1031,7 @@ outerloop: movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 -toploop: +curve25519_x25519base_alt_toploop: movq (%r8,%r9,8), %rbx movq (%r15,%r9,8), %rcx movq %r11, %r10 @@ -1047,7 +1047,7 @@ toploop: sbbq %r11, %r11 incq %r9 cmpq %r13, %r9 - jb toploop + jb curve25519_x25519base_alt_toploop movq %r12, %rax orq %rbp, %rax bsrq %rax, %rcx @@ -1067,7 +1067,7 @@ toploop: movq %r13, 0x10(%rsp) movq %r8, (%rsp) movq %r15, 0x18(%rsp) -innerloop: +curve25519_x25519base_alt_innerloop: xorl %eax, %eax xorl %ebx, %ebx xorq %r8, %r8 @@ -1097,7 +1097,7 @@ innerloop: addq %rcx, %rcx addq %rdx, %rdx decq %r9 - jne innerloop + jne curve25519_x25519base_alt_innerloop movq 0x8(%rsp), %rdi movq 0x10(%rsp), %r13 movq (%rsp), %r8 @@ -1113,7 +1113,7 @@ innerloop: xorq %r10, %r10 xorq %r11, %r11 xorq %r9, %r9 -congloop: +curve25519_x25519base_alt_congloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1144,7 +1144,7 @@ congloop: movq %rbp, %rsi incq %r9 cmpq %rdi, %r9 - jb congloop + jb curve25519_x25519base_alt_congloop shldq $0x6, %r10, %r14 shldq $0x6, %r11, %rsi movq 0x48(%rsp), %r15 @@ -1158,8 +1158,8 @@ congloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je wmontend -wmontloop: + je curve25519_x25519base_alt_wmontend +curve25519_x25519base_alt_wmontloop: adcq (%r8,%r9,8), %r10 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1170,26 +1170,26 @@ wmontloop: movq %rdx, %r10 incq %r9 decq %rcx - jne wmontloop -wmontend: + jne curve25519_x25519base_alt_wmontloop +curve25519_x25519base_alt_wmontend: adcq %r14, %r10 movq %r10, -0x8(%r8,%rdi,8) sbbq %r10, %r10 negq %r10 movq %rdi, %rcx xorq %r9, %r9 -wcmploop: +curve25519_x25519base_alt_wcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne wcmploop + jne curve25519_x25519base_alt_wcmploop sbbq $0x0, %r10 sbbq %r10, %r10 notq %r10 xorq %rcx, %rcx xorq %r9, %r9 -wcorrloop: +curve25519_x25519base_alt_wcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r10, %rbx @@ -1199,7 +1199,7 @@ wcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb wcorrloop + jb curve25519_x25519base_alt_wcorrloop movq 0x40(%rsp), %r8 movq (%r8), %rbx movq 0x28(%rsp), %rbp @@ -1211,8 +1211,8 @@ wcorrloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je zmontend -zmontloop: + je curve25519_x25519base_alt_zmontend +curve25519_x25519base_alt_zmontloop: adcq (%r8,%r9,8), %r11 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1223,26 +1223,26 @@ zmontloop: movq %rdx, %r11 incq %r9 decq %rcx - jne zmontloop -zmontend: + jne curve25519_x25519base_alt_zmontloop +curve25519_x25519base_alt_zmontend: adcq %rsi, %r11 movq %r11, -0x8(%r8,%rdi,8) sbbq %r11, %r11 negq %r11 movq %rdi, %rcx xorq %r9, %r9 -zcmploop: +curve25519_x25519base_alt_zcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne zcmploop + jne curve25519_x25519base_alt_zcmploop sbbq $0x0, %r11 sbbq %r11, %r11 notq %r11 xorq %rcx, %rcx xorq %r9, %r9 -zcorrloop: +curve25519_x25519base_alt_zcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r11, %rbx @@ -1252,7 +1252,7 @@ zcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb zcorrloop + jb curve25519_x25519base_alt_zcorrloop movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 @@ -1260,7 +1260,7 @@ zcorrloop: xorq %r14, %r14 xorq %rbp, %rbp xorq %rsi, %rsi -crossloop: +curve25519_x25519base_alt_crossloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1291,13 +1291,13 @@ crossloop: movq %r11, %rsi incq %r9 cmpq %r13, %r9 - jb crossloop + jb curve25519_x25519base_alt_crossloop xorq %r9, %r9 movq %r12, %r10 movq %rbp, %r11 xorq %r12, %r14 xorq %rbp, %rsi -optnegloop: +curve25519_x25519base_alt_optnegloop: movq (%r8,%r9,8), %rax xorq %r12, %rax negq %r10 @@ -1312,11 +1312,11 @@ optnegloop: movq %rax, (%r15,%r9,8) incq %r9 cmpq %r13, %r9 - jb optnegloop + jb curve25519_x25519base_alt_optnegloop subq %r10, %r14 subq %r11, %rsi movq %r13, %r9 -shiftloop: +curve25519_x25519base_alt_shiftloop: movq -0x8(%r8,%r9,8), %rax movq %rax, %r10 shrdq $0x3a, %r14, %rax @@ -1328,7 +1328,7 @@ shiftloop: movq %rax, -0x8(%r15,%r9,8) movq %r11, %rsi decq %r9 - jne shiftloop + jne curve25519_x25519base_alt_shiftloop notq %rbp movq 0x48(%rsp), %rcx movq 0x38(%rsp), %r8 @@ -1336,7 +1336,7 @@ shiftloop: movq %r12, %r10 movq %rbp, %r11 xorq %r9, %r9 -fliploop: +curve25519_x25519base_alt_fliploop: movq %rbp, %rdx movq (%rcx,%r9,8), %rax andq %rax, %rdx @@ -1355,9 +1355,9 @@ fliploop: movq %rdx, (%r15,%r9,8) incq %r9 cmpq %rdi, %r9 - jb fliploop + jb curve25519_x25519base_alt_fliploop subq $0x3a, 0x20(%rsp) - ja outerloop + ja curve25519_x25519base_alt_outerloop // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1389,7 +1389,7 @@ fliploop: // The modulus, for the modular inverse -p_25519: +curve25519_x25519base_alt_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1398,7 +1398,7 @@ p_25519: // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +curve25519_x25519base_alt_edwards25519_0g: .quad 0x251037f7cf4e861d .quad 0x10ede0fb19fb128f @@ -1415,7 +1415,7 @@ edwards25519_0g: .quad 0x72e302a348492870 .quad 0x1253c19e53dbe1bc -edwards25519_8g: +curve25519_x25519base_alt_edwards25519_8g: .quad 0x331d086e0d9abcaa .quad 0x1e23c96d311a10c9 @@ -1435,7 +1435,7 @@ edwards25519_8g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +curve25519_x25519base_alt_edwards25519_gtable: // 2^4 * 1 * G From ee9c3aff2391e64e8e27a36eb99dfe6efe0c73cc Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 21 Apr 2023 08:47:52 -0700 Subject: [PATCH 33/42] Add edwards25519 (Ed25519) basepoint multiplication This is very similar to the core of curve25519_x25519base, but without the post-translation to the curve25519 Montgomery form, and not fixing by assumption any low or high bits of the scalar but accepting an arbitrary 256-bit scalar as input (though it is reduced modulo the basepoint order internally). s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/b365f8e01aa5dc5099d809fff839708420040e6a --- arm/curve25519/edwards25519_scalarmulbase.S | 8935 ++++++++++++++++ .../edwards25519_scalarmulbase_alt.S | 8777 ++++++++++++++++ .../curve25519/edwards25519_scalarmulbase.S | 8957 ++++++++++++++++ .../edwards25519_scalarmulbase_alt.S | 9033 +++++++++++++++++ 4 files changed, 35702 insertions(+) create mode 100644 arm/curve25519/edwards25519_scalarmulbase.S create mode 100644 arm/curve25519/edwards25519_scalarmulbase_alt.S create mode 100644 x86_att/curve25519/edwards25519_scalarmulbase.S create mode 100644 x86_att/curve25519/edwards25519_scalarmulbase_alt.S diff --git a/arm/curve25519/edwards25519_scalarmulbase.S b/arm/curve25519/edwards25519_scalarmulbase.S new file mode 100644 index 0000000000..475308050b --- /dev/null +++ b/arm/curve25519/edwards25519_scalarmulbase.S @@ -0,0 +1,8935 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Scalar multiplication for the edwards25519 standard basepoint +// Input scalar[4]; output res[8] +// +// extern void edwards25519_scalarmulbase +// (uint64_t res[static 8],uint64_t scalar[static 4]); +// +// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmulbase) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmulbase) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x23 + +// Other variables that are only needed prior to the modular inverse. + +#define tab x19 + +#define i x20 + +#define bias x21 + +#define bf x22 +#define ix x22 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) +#define resy res, #(1*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) + +#define tabent sp, #(1*NUMSIZE) +#define ymx_2 sp, #(1*NUMSIZE) +#define xpy_2 sp, #(2*NUMSIZE) +#define kxy_2 sp, #(3*NUMSIZE) + +#define acc sp, #(4*NUMSIZE) +#define x_1 sp, #(4*NUMSIZE) +#define y_1 sp, #(5*NUMSIZE) +#define z_1 sp, #(6*NUMSIZE) +#define w_1 sp, #(7*NUMSIZE) +#define x_3 sp, #(4*NUMSIZE) +#define y_3 sp, #(5*NUMSIZE) +#define z_3 sp, #(6*NUMSIZE) +#define w_3 sp, #(7*NUMSIZE) + +#define tmpspace sp, #(8*NUMSIZE) +#define t0 sp, #(8*NUMSIZE) +#define t1 sp, #(9*NUMSIZE) +#define t2 sp, #(10*NUMSIZE) +#define t3 sp, #(11*NUMSIZE) +#define t4 sp, #(12*NUMSIZE) +#define t5 sp, #(13*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (14*NUMSIZE) + +// Load 64-bit immediate into a register + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umaddl x5, w5, w0, x5; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + mov x3, #0x13; \ + tst x10, #0x8000000000000000; \ + csel x3, x3, xzr, pl; \ + subs x7, x7, x3; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + and x10, x10, #0x7fffffffffffffff; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umull x5, w5, w0; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +S2N_BN_SYMBOL(edwards25519_scalarmulbase): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the input scalar x to its local variable while reducing it +// modulo 2^252 + m where m = 27742317777372353535851937790883648493; +// this is the order of the basepoint so this doesn't change the result. +// First do q = floor(x/2^252) and x' = x - q * (2^252 + m), which gives +// an initial result -15 * m <= x' < 2^252 + + ldp x10, x11, [x1] + ldp x12, x13, [x1, #16] + + lsr x9, x13, #60 + + movbig(x0,#0x5812,#0x631a,#0x5cf5,#0xd3ed); + movbig(x1,#0x14de,#0xf9de,#0xa2f7,#0x9cd6); + + mul x2, x9, x0 + mul x3, x9, x1 + umulh x4, x9, x0 + umulh x5, x9, x1 + + adds x3, x3, x4 + adc x4, x5, xzr + lsl x5, x9, #60 + + subs x10, x10, x2 + sbcs x11, x11, x3 + sbcs x12, x12, x4 + sbcs x13, x13, x5 + +// If x' < 0 then just directly negate it; this makes sure the +// reduced argument is strictly 0 <= x' < 2^252, but now we need +// to record (done via bit 255 of the reduced scalar, which is +// ignored in the main loop) when we negated so we can flip +// the sign of the eventual point to compensate. + + csetm x9, cc + adds xzr, x9, x9 + eor x10, x10, x9 + adcs x10, x10, xzr + eor x11, x11, x9 + adcs x11, x11, xzr + eor x12, x12, x9 + adcs x12, x12, xzr + eor x13, x13, x9 + adc x13, x13, xzr + + and x9, x9, #0x8000000000000000 + orr x13, x13, x9 + +// And before we store the scalar, test and reset bit 251 to +// initialize the main loop just below. + + stp x10, x11, [scalar] + tst x13, #0x0800000000000000 + bic x13, x13, #0x0800000000000000 + stp x12, x13, [scalar+16] + +// The main part of the computation is in extended-projective coordinates +// (X,Y,Z,T), representing an affine point on the edwards25519 curve +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// In comments B means the standard basepoint (x,4/5) = +// (0x216....f25d51a,0x0x6666..666658). +// +// Initialize accumulator "acc" to either 0 or 2^251 * B depending on +// bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. + + adr x10, edwards25519_0g + adr x11, edwards25519_251g + ldp x0, x1, [x10] + ldp x2, x3, [x11] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc] + + ldp x0, x1, [x10, 1*16] + ldp x2, x3, [x11, 1*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+1*16] + + ldp x0, x1, [x10, 2*16] + ldp x2, x3, [x11, 2*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+2*16] + + ldp x0, x1, [x10, 3*16] + ldp x2, x3, [x11, 3*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+3*16] + + mov x0, #1 + stp x0, xzr, [acc+4*16] + stp xzr, xzr, [acc+5*16] + + ldp x0, x1, [x10, 4*16] + ldp x2, x3, [x11, 4*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+6*16] + + ldp x0, x1, [x10, 5*16] + ldp x2, x3, [x11, 5*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+7*16] + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 0 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * B at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * B = (16 * (h + 1) - (16 - l)) * B is used +// when l >= 9. Note that we can't have any bias left over at the +// end because we made sure bit 251 is clear in the reduced scalar. + + mov i, 0 + adr tab, edwards25519_gtable + mov bias, xzr + +// Start of the main loop, repeated 63 times for i = 0, 4, 8, ..., 248 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #15 + add bf, x2, bias + + cmp bf, 9 + cset bias, cs + + mov x0, 16 + sub x0, x0, bf + cmp bias, xzr + csel ix, x0, bf, ne + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp ix, #1 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #2 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #3 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #4 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #5 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #6 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #7 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #8 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + +// We now have the triple from the table in registers as follows +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmp bias, #0 + + csel x12, x0, x4, eq + csel x13, x1, x5, eq + csel x14, x2, x6, eq + csel x15, x3, x7, eq + stp x12, x13, [tabent] + stp x14, x15, [tabent+16] + + csel x12, x0, x4, ne + csel x13, x1, x5, ne + csel x14, x2, x6, ne + csel x15, x3, x7, ne + stp x12, x13, [tabent+32] + stp x14, x15, [tabent+48] + + mov x0, #-19 + subs x0, x0, x8 + mov x2, #-1 + sbcs x1, x2, x9 + sbcs x2, x2, x10 + mov x3, #0x7FFFFFFFFFFFFFFF + sbc x3, x3, x11 + + cmp ix, xzr + ccmp bias, xzr, #4, ne + + csel x0, x0, x8, ne + csel x1, x1, x9, ne + stp x0, x1, [tabent+64] + csel x2, x2, x10, ne + csel x3, x3, x11, ne + stp x2, x3, [tabent+80] + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd_alt(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + add i, i, 4 + cmp i, 252 + bcc scalarloop + +// Insert the optional negation of the projective X coordinate, and +// so by extension the final affine x coordinate x = X/Z and thus +// the point P = (x,y). We only know X < 2 * p_25519, so we do the +// negation as 2 * p_25519 - X to keep it nonnegative. From this +// point on we don't need any normalization of the coordinates +// except for making sure that they fit in 4 digits. + + ldp x0, x1, [x_3] + ldp x2, x3, [x_3+16] + mov x4, #0xffffffffffffffda + subs x4, x4, x0 + mov x7, #0xffffffffffffffff + sbcs x5, x7, x1 + sbcs x6, x7, x2 + sbc x7, x7, x3 + ldr x10, [scalar+24] + tst x10, #0x8000000000000000 + csel x0, x4, x0, ne + csel x1, x5, x1, ne + csel x2, x6, x2, ne + csel x3, x7, x3, ne + stp x0, x1, [x_3] + stp x2, x3, [x_3+16] + +// Now we need to map out of the extended-projective representation +// (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means +// first calling the modular inverse to get w_3 = 1/z_3. + + mov x0, 4 + add x1, w_3 + add x2, z_3 + adr x3, p_25519 + add x4, tmpspace + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// The final result is x = X * inv(Z), y = Y * inv(Z). +// These are the only operations in the whole computation that +// fully reduce modulo p_25519 since now we want the canonical +// answer as output. + + mul_p25519(resx,x_3,w_3) + mul_p25519(resy,y_3,w_3) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 0 * B = 0 and 2^251 * B in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000001 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + +edwards25519_251g: + + .quad 0x525f946d7c7220e7 + .quad 0x4636b0b2f1e35444 + .quad 0x796e9d70e892ae0f + .quad 0x03dec05fa937adb1 + .quad 0x6d1c271cc6375515 + .quad 0x462588c4a4ca4f14 + .quad 0x691129fee55afc39 + .quad 0x15949f784d8472f5 + .quad 0xbd89e510afad0049 + .quad 0x4d1f08c073b9860e + .quad 0x07716e8b2d00af9d + .quad 0x70d685f68f859714 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^0 * 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2^0 * 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 2^0 * 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 2^0 * 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 2^0 * 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 2^0 * 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 2^0 * 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 2^0 * 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + + // 2^4 * 1 * B + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * B + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * B + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * B + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * B + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * B + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * B + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * B + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * B + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * B + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * B + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * B + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * B + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * B + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * B + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * B + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * B + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * B + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * B + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * B + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * B + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * B + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * B + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * B + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * B + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * B + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * B + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * B + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * B + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * B + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * B + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * B + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * B + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * B + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * B + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * B + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * B + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * B + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * B + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * B + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * B + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * B + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * B + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * B + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * B + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * B + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * B + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * B + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * B + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * B + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * B + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * B + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * B + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * B + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * B + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * B + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * B + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * B + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * B + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * B + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * B + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * B + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * B + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * B + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * B + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * B + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * B + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * B + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * B + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * B + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * B + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * B + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * B + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * B + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * B + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * B + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * B + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * B + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * B + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * B + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * B + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * B + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * B + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * B + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * B + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * B + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * B + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * B + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * B + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * B + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * B + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * B + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * B + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * B + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * B + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * B + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * B + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * B + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * B + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * B + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * B + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * B + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * B + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * B + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * B + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * B + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * B + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * B + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * B + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * B + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * B + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * B + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * B + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * B + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * B + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * B + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * B + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * B + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * B + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * B + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * B + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * B + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * B + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * B + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * B + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * B + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * B + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * B + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * B + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * B + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * B + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * B + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * B + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * B + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * B + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * B + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * B + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * B + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * B + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * B + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * B + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * B + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * B + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * B + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * B + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * B + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * B + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * B + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * B + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * B + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * B + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * B + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * B + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * B + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * B + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * B + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * B + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * B + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * B + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * B + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * B + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * B + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * B + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * B + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * B + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * B + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * B + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * B + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * B + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * B + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * B + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * B + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * B + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * B + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * B + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * B + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * B + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * B + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * B + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * B + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * B + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * B + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * B + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * B + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * B + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * B + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * B + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * B + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * B + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * B + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * B + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * B + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * B + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * B + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * B + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * B + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * B + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * B + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * B + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * B + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * B + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * B + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * B + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * B + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * B + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * B + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * B + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * B + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * B + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * B + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * B + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * B + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * B + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * B + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * B + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * B + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * B + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * B + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * B + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * B + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * B + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * B + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * B + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * B + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * B + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * B + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * B + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * B + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * B + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * B + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * B + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * B + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * B + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * B + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * B + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * B + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * B + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * B + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * B + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * B + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * B + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * B + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * B + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * B + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * B + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * B + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * B + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * B + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * B + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * B + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * B + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * B + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * B + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * B + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * B + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * B + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * B + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * B + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * B + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * B + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * B + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * B + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * B + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * B + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * B + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * B + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * B + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * B + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * B + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * B + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * B + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * B + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * B + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * B + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * B + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * B + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * B + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * B + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * B + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * B + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * B + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * B + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * B + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * B + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * B + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * B + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * B + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * B + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * B + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * B + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * B + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * B + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * B + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * B + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * B + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * B + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * B + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * B + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * B + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * B + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * B + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * B + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * B + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * B + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * B + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * B + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * B + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * B + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * B + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * B + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * B + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * B + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * B + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * B + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * B + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * B + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * B + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * B + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * B + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * B + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * B + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * B + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * B + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * B + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * B + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * B + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * B + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * B + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * B + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * B + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * B + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * B + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * B + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * B + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * B + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * B + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * B + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * B + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * B + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * B + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * B + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * B + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * B + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * B + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * B + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * B + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * B + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * B + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * B + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * B + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * B + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * B + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * B + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * B + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * B + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * B + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * B + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * B + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * B + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * B + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * B + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * B + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * B + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * B + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * B + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * B + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * B + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * B + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * B + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * B + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * B + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * B + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * B + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * B + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * B + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * B + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * B + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * B + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * B + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * B + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * B + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * B + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * B + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * B + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * B + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * B + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * B + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * B + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * B + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * B + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * B + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * B + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * B + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * B + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * B + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * B + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * B + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * B + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * B + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * B + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * B + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * B + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * B + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * B + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * B + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * B + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * B + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * B + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * B + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * B + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * B + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * B + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * B + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * B + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * B + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * B + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * B + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * B + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * B + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * B + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * B + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * B + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * B + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * B + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * B + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * B + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * B + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * B + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * B + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * B + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * B + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * B + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * B + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * B + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * B + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * B + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * B + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * B + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * B + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * B + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * B + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * B + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * B + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * B + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * B + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * B + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * B + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * B + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * B + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * B + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * B + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * B + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * B + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * B + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * B + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * B + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * B + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * B + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * B + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * B + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * B + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * B + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * B + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * B + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * B + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * B + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * B + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * B + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * B + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * B + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * B + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * B + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * B + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * B + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * B + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * B + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * B + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * B + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * B + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * B + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * B + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * B + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * B + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * B + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * B + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * B + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * B + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * B + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * B + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * B + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * B + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * B + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * B + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * B + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * B + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * B + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/edwards25519_scalarmulbase_alt.S b/arm/curve25519/edwards25519_scalarmulbase_alt.S new file mode 100644 index 0000000000..f5a197861a --- /dev/null +++ b/arm/curve25519/edwards25519_scalarmulbase_alt.S @@ -0,0 +1,8777 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Scalar multiplication for the edwards25519 standard basepoint +// Input scalar[4]; output res[8] +// +// extern void edwards25519_scalarmulbase_alt +// (uint64_t res[static 8],uint64_t scalar[static 4]); +// +// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Standard ARM ABI: X0 = res, X1 = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmulbase_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmulbase_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x23 + +// Other variables that are only needed prior to the modular inverse. + +#define tab x19 + +#define i x20 + +#define bias x21 + +#define bf x22 +#define ix x22 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) +#define resy res, #(1*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) + +#define tabent sp, #(1*NUMSIZE) +#define ymx_2 sp, #(1*NUMSIZE) +#define xpy_2 sp, #(2*NUMSIZE) +#define kxy_2 sp, #(3*NUMSIZE) + +#define acc sp, #(4*NUMSIZE) +#define x_1 sp, #(4*NUMSIZE) +#define y_1 sp, #(5*NUMSIZE) +#define z_1 sp, #(6*NUMSIZE) +#define w_1 sp, #(7*NUMSIZE) +#define x_3 sp, #(4*NUMSIZE) +#define y_3 sp, #(5*NUMSIZE) +#define z_3 sp, #(6*NUMSIZE) +#define w_3 sp, #(7*NUMSIZE) + +#define tmpspace sp, #(8*NUMSIZE) +#define t0 sp, #(8*NUMSIZE) +#define t1 sp, #(9*NUMSIZE) +#define t2 sp, #(10*NUMSIZE) +#define t3 sp, #(11*NUMSIZE) +#define t4 sp, #(12*NUMSIZE) +#define t5 sp, #(13*NUMSIZE) + +// Total size to reserve on the stack + +#define NSPACE (14*NUMSIZE) + +// Load 64-bit immediate into a register + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + orr x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + madd x11, x7, x8, x7; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, xzr; \ + csel x7, x7, xzr, cc; \ + subs x12, x12, x7; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + and x15, x15, #0x7fffffffffffffff; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + bic x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + mul x11, x7, x8; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adc x15, x15, xzr; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +S2N_BN_SYMBOL(edwards25519_scalarmulbase_alt): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy the input scalar x to its local variable while reducing it +// modulo 2^252 + m where m = 27742317777372353535851937790883648493; +// this is the order of the basepoint so this doesn't change the result. +// First do q = floor(x/2^252) and x' = x - q * (2^252 + m), which gives +// an initial result -15 * m <= x' < 2^252 + + ldp x10, x11, [x1] + ldp x12, x13, [x1, #16] + + lsr x9, x13, #60 + + movbig(x0,#0x5812,#0x631a,#0x5cf5,#0xd3ed); + movbig(x1,#0x14de,#0xf9de,#0xa2f7,#0x9cd6); + + mul x2, x9, x0 + mul x3, x9, x1 + umulh x4, x9, x0 + umulh x5, x9, x1 + + adds x3, x3, x4 + adc x4, x5, xzr + lsl x5, x9, #60 + + subs x10, x10, x2 + sbcs x11, x11, x3 + sbcs x12, x12, x4 + sbcs x13, x13, x5 + +// If x' < 0 then just directly negate it; this makes sure the +// reduced argument is strictly 0 <= x' < 2^252, but now we need +// to record (done via bit 255 of the reduced scalar, which is +// ignored in the main loop) when we negated so we can flip +// the sign of the eventual point to compensate. + + csetm x9, cc + adds xzr, x9, x9 + eor x10, x10, x9 + adcs x10, x10, xzr + eor x11, x11, x9 + adcs x11, x11, xzr + eor x12, x12, x9 + adcs x12, x12, xzr + eor x13, x13, x9 + adc x13, x13, xzr + + and x9, x9, #0x8000000000000000 + orr x13, x13, x9 + +// And before we store the scalar, test and reset bit 251 to +// initialize the main loop just below. + + stp x10, x11, [scalar] + tst x13, #0x0800000000000000 + bic x13, x13, #0x0800000000000000 + stp x12, x13, [scalar+16] + +// The main part of the computation is in extended-projective coordinates +// (X,Y,Z,T), representing an affine point on the edwards25519 curve +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// In comments B means the standard basepoint (x,4/5) = +// (0x216....f25d51a,0x0x6666..666658). +// +// Initialize accumulator "acc" to either 0 or 2^251 * B depending on +// bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. + + adr x10, edwards25519_0g + adr x11, edwards25519_251g + ldp x0, x1, [x10] + ldp x2, x3, [x11] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc] + + ldp x0, x1, [x10, 1*16] + ldp x2, x3, [x11, 1*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+1*16] + + ldp x0, x1, [x10, 2*16] + ldp x2, x3, [x11, 2*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+2*16] + + ldp x0, x1, [x10, 3*16] + ldp x2, x3, [x11, 3*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+3*16] + + mov x0, #1 + stp x0, xzr, [acc+4*16] + stp xzr, xzr, [acc+5*16] + + ldp x0, x1, [x10, 4*16] + ldp x2, x3, [x11, 4*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+6*16] + + ldp x0, x1, [x10, 5*16] + ldp x2, x3, [x11, 5*16] + csel x0, x0, x2, eq + csel x1, x1, x3, eq + stp x0, x1, [acc+7*16] + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 0 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * B at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * B = (16 * (h + 1) - (16 - l)) * B is used +// when l >= 9. Note that we can't have any bias left over at the +// end because we made sure bit 251 is clear in the reduced scalar. + + mov i, 0 + adr tab, edwards25519_gtable + mov bias, xzr + +// Start of the main loop, repeated 63 times for i = 0, 4, 8, ..., 248 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + lsr x0, i, #6 + ldr x2, [sp, x0, lsl #3] // Exploiting scalar = sp exactly + lsr x2, x2, i + and x2, x2, #15 + add bf, x2, bias + + cmp bf, 9 + cset bias, cs + + mov x0, 16 + sub x0, x0, bf + cmp bias, xzr + csel ix, x0, bf, ne + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp ix, #1 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #2 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #3 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #4 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #5 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #6 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #7 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + + cmp ix, #8 + ldp x12, x13, [tab] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [tab, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [tab, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [tab, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [tab, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [tab, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add tab, tab, #96 + +// We now have the triple from the table in registers as follows +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmp bias, #0 + + csel x12, x0, x4, eq + csel x13, x1, x5, eq + csel x14, x2, x6, eq + csel x15, x3, x7, eq + stp x12, x13, [tabent] + stp x14, x15, [tabent+16] + + csel x12, x0, x4, ne + csel x13, x1, x5, ne + csel x14, x2, x6, ne + csel x15, x3, x7, ne + stp x12, x13, [tabent+32] + stp x14, x15, [tabent+48] + + mov x0, #-19 + subs x0, x0, x8 + mov x2, #-1 + sbcs x1, x2, x9 + sbcs x2, x2, x10 + mov x3, #0x7FFFFFFFFFFFFFFF + sbc x3, x3, x11 + + cmp ix, xzr + ccmp bias, xzr, #4, ne + + csel x0, x0, x8, ne + csel x1, x1, x9, ne + stp x0, x1, [tabent+64] + csel x2, x2, x10, ne + csel x3, x3, x11, ne + stp x2, x3, [tabent+80] + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd_alt(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + add i, i, 4 + cmp i, 252 + bcc scalarloop + +// Insert the optional negation of the projective X coordinate, and +// so by extension the final affine x coordinate x = X/Z and thus +// the point P = (x,y). We only know X < 2 * p_25519, so we do the +// negation as 2 * p_25519 - X to keep it nonnegative. From this +// point on we don't need any normalization of the coordinates +// except for making sure that they fit in 4 digits. + + ldp x0, x1, [x_3] + ldp x2, x3, [x_3+16] + mov x4, #0xffffffffffffffda + subs x4, x4, x0 + mov x7, #0xffffffffffffffff + sbcs x5, x7, x1 + sbcs x6, x7, x2 + sbc x7, x7, x3 + ldr x10, [scalar+24] + tst x10, #0x8000000000000000 + csel x0, x4, x0, ne + csel x1, x5, x1, ne + csel x2, x6, x2, ne + csel x3, x7, x3, ne + stp x0, x1, [x_3] + stp x2, x3, [x_3+16] + +// Now we need to map out of the extended-projective representation +// (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means +// first calling the modular inverse to get w_3 = 1/z_3. + + mov x0, 4 + add x1, w_3 + add x2, z_3 + adr x3, p_25519 + add x4, tmpspace + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, wmontend +wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wmontloop +wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, zmontend +zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zmontloop +zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip1 +negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop1 +negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, negskip2 +negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, negloop2 +negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, zfliploop + subs x2, x2, #0x3a + b.hi outerloop + +// The final result is x = X * inv(Z), y = Y * inv(Z). +// These are the only operations in the whole computation that +// fully reduce modulo p_25519 since now we want the canonical +// answer as output. + + mul_p25519(resx,x_3,w_3) + mul_p25519(resy,y_3,w_3) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 0 * B = 0 and 2^251 * B in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000001 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + +edwards25519_251g: + + .quad 0x525f946d7c7220e7 + .quad 0x4636b0b2f1e35444 + .quad 0x796e9d70e892ae0f + .quad 0x03dec05fa937adb1 + .quad 0x6d1c271cc6375515 + .quad 0x462588c4a4ca4f14 + .quad 0x691129fee55afc39 + .quad 0x15949f784d8472f5 + .quad 0xbd89e510afad0049 + .quad 0x4d1f08c073b9860e + .quad 0x07716e8b2d00af9d + .quad 0x70d685f68f859714 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^0 * 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2^0 * 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 2^0 * 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 2^0 * 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 2^0 * 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 2^0 * 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 2^0 * 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 2^0 * 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + + // 2^4 * 1 * B + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * B + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * B + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * B + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * B + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * B + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * B + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * B + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * B + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * B + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * B + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * B + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * B + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * B + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * B + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * B + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * B + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * B + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * B + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * B + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * B + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * B + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * B + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * B + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * B + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * B + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * B + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * B + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * B + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * B + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * B + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * B + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * B + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * B + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * B + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * B + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * B + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * B + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * B + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * B + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * B + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * B + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * B + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * B + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * B + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * B + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * B + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * B + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * B + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * B + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * B + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * B + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * B + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * B + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * B + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * B + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * B + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * B + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * B + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * B + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * B + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * B + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * B + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * B + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * B + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * B + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * B + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * B + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * B + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * B + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * B + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * B + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * B + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * B + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * B + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * B + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * B + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * B + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * B + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * B + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * B + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * B + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * B + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * B + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * B + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * B + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * B + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * B + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * B + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * B + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * B + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * B + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * B + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * B + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * B + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * B + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * B + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * B + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * B + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * B + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * B + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * B + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * B + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * B + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * B + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * B + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * B + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * B + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * B + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * B + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * B + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * B + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * B + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * B + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * B + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * B + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * B + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * B + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * B + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * B + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * B + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * B + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * B + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * B + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * B + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * B + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * B + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * B + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * B + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * B + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * B + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * B + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * B + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * B + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * B + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * B + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * B + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * B + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * B + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * B + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * B + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * B + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * B + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * B + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * B + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * B + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * B + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * B + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * B + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * B + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * B + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * B + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * B + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * B + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * B + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * B + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * B + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * B + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * B + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * B + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * B + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * B + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * B + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * B + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * B + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * B + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * B + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * B + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * B + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * B + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * B + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * B + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * B + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * B + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * B + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * B + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * B + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * B + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * B + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * B + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * B + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * B + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * B + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * B + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * B + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * B + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * B + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * B + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * B + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * B + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * B + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * B + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * B + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * B + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * B + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * B + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * B + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * B + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * B + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * B + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * B + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * B + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * B + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * B + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * B + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * B + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * B + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * B + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * B + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * B + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * B + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * B + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * B + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * B + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * B + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * B + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * B + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * B + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * B + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * B + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * B + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * B + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * B + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * B + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * B + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * B + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * B + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * B + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * B + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * B + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * B + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * B + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * B + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * B + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * B + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * B + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * B + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * B + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * B + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * B + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * B + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * B + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * B + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * B + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * B + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * B + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * B + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * B + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * B + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * B + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * B + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * B + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * B + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * B + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * B + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * B + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * B + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * B + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * B + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * B + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * B + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * B + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * B + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * B + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * B + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * B + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * B + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * B + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * B + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * B + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * B + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * B + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * B + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * B + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * B + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * B + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * B + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * B + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * B + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * B + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * B + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * B + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * B + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * B + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * B + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * B + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * B + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * B + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * B + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * B + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * B + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * B + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * B + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * B + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * B + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * B + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * B + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * B + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * B + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * B + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * B + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * B + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * B + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * B + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * B + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * B + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * B + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * B + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * B + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * B + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * B + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * B + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * B + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * B + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * B + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * B + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * B + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * B + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * B + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * B + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * B + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * B + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * B + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * B + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * B + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * B + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * B + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * B + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * B + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * B + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * B + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * B + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * B + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * B + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * B + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * B + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * B + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * B + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * B + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * B + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * B + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * B + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * B + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * B + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * B + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * B + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * B + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * B + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * B + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * B + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * B + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * B + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * B + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * B + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * B + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * B + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * B + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * B + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * B + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * B + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * B + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * B + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * B + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * B + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * B + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * B + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * B + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * B + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * B + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * B + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * B + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * B + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * B + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * B + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * B + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * B + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * B + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * B + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * B + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * B + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * B + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * B + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * B + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * B + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * B + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * B + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * B + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * B + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * B + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * B + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * B + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * B + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * B + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * B + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * B + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * B + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * B + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * B + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * B + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * B + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * B + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * B + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * B + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * B + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * B + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * B + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * B + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * B + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * B + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * B + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * B + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * B + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * B + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * B + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * B + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * B + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * B + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * B + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * B + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * B + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * B + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * B + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * B + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * B + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * B + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * B + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * B + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * B + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * B + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * B + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * B + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * B + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * B + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * B + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * B + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * B + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * B + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * B + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * B + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * B + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * B + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * B + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * B + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * B + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * B + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * B + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * B + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * B + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * B + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * B + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * B + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * B + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * B + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * B + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * B + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * B + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * B + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * B + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * B + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * B + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * B + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * B + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * B + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * B + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * B + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * B + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * B + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * B + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * B + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * B + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * B + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * B + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * B + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * B + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * B + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * B + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * B + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * B + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * B + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * B + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * B + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * B + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * B + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * B + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * B + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * B + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * B + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * B + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * B + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * B + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * B + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * B + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * B + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * B + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * B + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * B + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/edwards25519_scalarmulbase.S b/x86_att/curve25519/edwards25519_scalarmulbase.S new file mode 100644 index 0000000000..0a3e7f92df --- /dev/null +++ b/x86_att/curve25519/edwards25519_scalarmulbase.S @@ -0,0 +1,8957 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Scalar multiplication for the edwards25519 standard basepoint +// Input scalar[4]; output res[8] +// +// extern void edwards25519_scalarmulbase +// (uint64_t res[static 8],uint64_t scalar[static 4]); +// +// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar +// Microsoft x64 ABI: RCX = res, RDX = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmulbase) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmulbase) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// The result "resx" assumes the "res" pointer has been preloaded into %rbp. + +#define resx (0*NUMSIZE)(%rbp) +#define resy (1*NUMSIZE)(%rbp) + +#define scalar (0*NUMSIZE)(%rsp) + +#define tabent (1*NUMSIZE)(%rsp) +#define ymx_2 (1*NUMSIZE)(%rsp) +#define xpy_2 (2*NUMSIZE)(%rsp) +#define kxy_2 (3*NUMSIZE)(%rsp) + +#define acc (4*NUMSIZE)(%rsp) +#define x_1 (4*NUMSIZE)(%rsp) +#define y_1 (5*NUMSIZE)(%rsp) +#define z_1 (6*NUMSIZE)(%rsp) +#define w_1 (7*NUMSIZE)(%rsp) +#define x_3 (4*NUMSIZE)(%rsp) +#define y_3 (5*NUMSIZE)(%rsp) +#define z_3 (6*NUMSIZE)(%rsp) +#define w_3 (7*NUMSIZE)(%rsp) + +#define tmpspace (8*NUMSIZE)(%rsp) +#define t0 (8*NUMSIZE)(%rsp) +#define t1 (9*NUMSIZE)(%rsp) +#define t2 (10*NUMSIZE)(%rsp) +#define t3 (11*NUMSIZE)(%rsp) +#define t4 (12*NUMSIZE)(%rsp) +#define t5 (13*NUMSIZE)(%rsp) + +// Stable homes for the input result pointer, and other variables + +#define res 14*NUMSIZE(%rsp) + +#define i 14*NUMSIZE+8(%rsp) + +#define bias 14*NUMSIZE+16(%rsp) + +#define bf 14*NUMSIZE+24(%rsp) +#define ix 14*NUMSIZE+24(%rsp) + +#define tab 15*NUMSIZE(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (15*NUMSIZE+8) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519. + +#define mul_p25519(P0,P1,P2) \ + xorl %esi, %esi ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rsi, %r12 ; \ + xorl %esi, %esi ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rsi, %r13 ; \ + adcxq %rsi, %r13 ; \ + xorl %esi, %esi ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rsi, %r14 ; \ + adcxq %rsi, %r14 ; \ + xorl %esi, %esi ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rsi, %r15 ; \ + adcxq %rsi, %r15 ; \ + movl $0x26, %edx ; \ + xorl %esi, %esi ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rsi, %r12 ; \ + adcxq %rsi, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + movl $0x13, %edx ; \ + incq %r12; \ + bts $63, %r11 ; \ + mulxq %r12, %rax, %rbx ; \ + addq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rsi, %r10 ; \ + adcq %rsi, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rdx, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rsi, %r9 ; \ + sbbq %rsi, %r10 ; \ + sbbq %rsi, %r11 ; \ + btr $63, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + xorl %ecx, %ecx ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rcx, %r13 ; \ + xorl %ecx, %ecx ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcxq %rcx, %r14 ; \ + xorl %ecx, %ecx ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcxq %rcx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ecx, %ecx ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcxq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +#define double_twice4(P0,P1) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq %r8, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq %r9, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq %r10, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq %r11, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +S2N_BN_SYMBOL(edwards25519_scalarmulbase): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from keeping code and data together. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + callq curve25519_x25519base_standard + popq %rsi + popq %rdi + ret + +curve25519_x25519base_standard: +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy the input scalar x to its local variable while reducing it +// modulo 2^252 + m where m = 27742317777372353535851937790883648493; +// this is the order of the basepoint so this doesn't change the result. +// First do q = floor(x/2^252) and x' = x - q * (2^252 + m), which gives +// an initial result -15 * m <= x' < 2^252 + + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + + movq %r11, %rcx + shrq $60, %rcx + + movq $0x5812631a5cf5d3ed, %rax + mulq %rcx + movq %rax, %r12 + movq %rdx, %r13 + movq $0x14def9dea2f79cd6, %rax + mulq %rcx + addq %rax, %r13 + adcq $0, %rdx + shlq $60, %rcx + + subq %r12, %r8 + sbbq %r13, %r9 + sbbq %rdx, %r10 + sbbq %rcx, %r11 + +// If x' < 0 then just directly negate it; this makes sure the +// reduced argument is strictly 0 <= x' < 2^252, but now we need +// to record (done via bit 255 of the reduced scalar, which is +// ignored in the main loop) when we negated so we can flip +// the end result to compensate. + + sbbq %rax, %rax + + xorq %rax, %r8 + xorq %rax, %r9 + xorq %rax, %r10 + xorq %rax, %r11 + + negq %rax + adcq $0, %r8 + adcq $0, %r9 + adcq $0, %r10 + adcq $0, %r11 + + shlq $63, %rax + orq %rax, %r11 + +// And before we store the scalar, test and reset bit 251 to +// initialize the main loop just below. + + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + btr $59, %r11 + movq %r11, 24(%rsp) + +// The main part of the computation is in extended-projective coordinates +// (X,Y,Z,T), representing an affine point on the edwards25519 curve +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// In comments B means the standard basepoint (x,4/5) = +// (0x216....f25d51a,0x0x6666..666658). +// +// Initialize accumulator "acc" to either 0 or 2^251 * B depending on +// bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. + + leaq edwards25519_0g(%rip), %r10 + leaq edwards25519_251g(%rip), %r11 + + movq (%r10), %rax + movq (%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*16(%rsp) + + movq 8*1(%r10), %rax + movq 8*1(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*17(%rsp) + + movq 8*2(%r10), %rax + movq 8*2(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*18(%rsp) + + movq 8*3(%r10), %rax + movq 8*3(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*19(%rsp) + + movq 8*4(%r10), %rax + movq 8*4(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*20(%rsp) + + movq 8*5(%r10), %rax + movq 8*5(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*21(%rsp) + + movq 8*6(%r10), %rax + movq 8*6(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*22(%rsp) + + movq 8*7(%r10), %rax + movq 8*7(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*23(%rsp) + + movl $1, %eax + movq %rax, 8*24(%rsp) + movl $0, %eax + movq %rax, 8*25(%rsp) + movq %rax, 8*26(%rsp) + movq %rax, 8*27(%rsp) + + movq 8*8(%r10), %rax + movq 8*8(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*28(%rsp) + + movq 8*9(%r10), %rax + movq 8*9(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*29(%rsp) + + movq 8*10(%r10), %rax + movq 8*10(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*30(%rsp) + + movq 8*11(%r10), %rax + movq 8*11(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*31(%rsp) + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 0 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * B at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * B = (16 * (h + 1) - (16 - l)) * B is used +// when l >= 9. Note that we can't have any bias left over at the +// end because we made sure bit 251 is clear in the reduced scalar. + + movq $0, i + leaq edwards25519_gtable(%rip), %rax + movq %rax, tab + movq $0, bias + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq (%rsp,%rax,8), %rax // Exploiting scalar = sp exactly + shrq %cl, %rax + andq $15, %rax + addq bias, %rax + movq %rax, bf + + cmpq $9, bf + sbbq %rax, %rax + incq %rax + movq %rax, bias + + movq $16, %rdi + subq bf, %rdi + cmpq $0, bias + cmovzq bf, %rdi + movq %rdi, ix + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + movq tab, %rbp + + cmpq $1, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + + addq $96, %rbp + movq %rbp, tab + +// We now have the triple from the table in registers as follows +// +// [%rdx;%rcx;%rbx;%rax] = y - x +// [%r11;%r10;%r9;%r8] = x + y +// [%r15;%r14;%r13;%r12] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmpq $0, bias + + movq %rax, %rsi + cmovnzq %r8, %rsi + cmovnzq %rax, %r8 + movq %rsi, 32(%rsp) + movq %r8, 64(%rsp) + + movq %rbx, %rsi + cmovnzq %r9, %rsi + cmovnzq %rbx, %r9 + movq %rsi, 40(%rsp) + movq %r9, 72(%rsp) + + movq %rcx, %rsi + cmovnzq %r10, %rsi + cmovnzq %rcx, %r10 + movq %rsi, 48(%rsp) + movq %r10, 80(%rsp) + + movq %rdx, %rsi + cmovnzq %r11, %rsi + cmovnzq %rdx, %r11 + movq %rsi, 56(%rsp) + movq %r11, 88(%rsp) + + movq $-19, %rax + movq $-1, %rbx + movq $-1, %rcx + movq $0x7fffffffffffffff, %rdx + subq %r12, %rax + sbbq %r13, %rbx + sbbq %r14, %rcx + sbbq %r15, %rdx + + movq ix, %r8 + movq bias, %r9 + testq %r8, %r8 + cmovzq %r8, %r9 + testq %r9, %r9 + + cmovzq %r12, %rax + cmovzq %r13, %rbx + cmovzq %r14, %rcx + cmovzq %r15, %rdx + movq %rax, 96(%rsp) + movq %rbx, 104(%rsp) + movq %rcx, 112(%rsp) + movq %rdx, 120(%rsp) + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + addq $4, i + cmpq $252, i + jc scalarloop + +// Insert the optional negation of the projective X coordinate, and +// so by extension the final affine x coordinate x = X/Z and thus +// the point P = (x,y). We only know X < 2 * p_25519, so we do the +// negation as 2 * p_25519 - X to keep it nonnegative. From this +// point on we don't need any normalization of the coordinates +// except for making sure that they fit in 4 digits. + + movq 128(%rsp), %r8 + movq 136(%rsp), %r9 + movq 144(%rsp), %r10 + movq 152(%rsp), %r11 + movq $0xffffffffffffffda, %r12 + subq %r8, %r12 + movq $0xffffffffffffffff, %r13 + sbbq %r9, %r13 + movq $0xffffffffffffffff, %r14 + sbbq %r10, %r14 + movq $0xffffffffffffffff, %r15 + sbbq %r11, %r15 + movq 24(%rsp), %rax + btq $63, %rax + cmovcq %r12, %r8 + cmovcq %r13, %r9 + cmovcq %r14, %r10 + cmovcq %r15, %r11 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq %r10, 144(%rsp) + movq %r11, 152(%rsp) + +// Now we need to map out of the extended-projective representation +// (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means +// first calling the modular inverse to get w_3 = 1/z_3. + + movq $4, %rdi + leaq 224(%rsp), %rsi + leaq 192(%rsp), %rdx + leaq p_25519(%rip), %rcx + leaq 256(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites local variables that are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +innerloop: + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi + cmovbq %r12, %rbp + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 + addq %r15, %r11 + shrq $1, %r12 + addq %rcx, %rcx + addq %rdx, %rdx + decq %r9 + jne innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je wmontend +wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne wmontloop +wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je zmontend +zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne zmontloop +zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb fliploop + subq $0x3a, 0x20(%rsp) + ja outerloop + +// The final result is x = X * inv(Z), y = Y * inv(Z). +// These are the only operations in the whole computation that +// fully reduce modulo p_25519 since now we want the canonical +// answer as output. + + movq res, %rbp + mul_p25519(resx,x_3,w_3) + mul_p25519(resy,y_3,w_3) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 0 * B = 0 and 2^251 * B in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000001 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + +edwards25519_251g: + + .quad 0x525f946d7c7220e7 + .quad 0x4636b0b2f1e35444 + .quad 0x796e9d70e892ae0f + .quad 0x03dec05fa937adb1 + .quad 0x6d1c271cc6375515 + .quad 0x462588c4a4ca4f14 + .quad 0x691129fee55afc39 + .quad 0x15949f784d8472f5 + .quad 0xbd89e510afad0049 + .quad 0x4d1f08c073b9860e + .quad 0x07716e8b2d00af9d + .quad 0x70d685f68f859714 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^0 * 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2^0 * 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 2^0 * 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 2^0 * 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 2^0 * 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 2^0 * 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 2^0 * 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 2^0 * 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/edwards25519_scalarmulbase_alt.S b/x86_att/curve25519/edwards25519_scalarmulbase_alt.S new file mode 100644 index 0000000000..bb07e1f207 --- /dev/null +++ b/x86_att/curve25519/edwards25519_scalarmulbase_alt.S @@ -0,0 +1,9033 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Scalar multiplication for the edwards25519 standard basepoint +// Input scalar[4]; output res[8] +// +// extern void edwards25519_scalarmulbase_alt +// (uint64_t res[static 8],uint64_t scalar[static 4]); +// +// Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar +// Microsoft x64 ABI: RCX = res, RDX = scalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmulbase_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmulbase_alt) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// The result "resx" assumes the "res" pointer has been preloaded into %rbp. + +#define resx (0*NUMSIZE)(%rbp) +#define resy (1*NUMSIZE)(%rbp) + +#define scalar (0*NUMSIZE)(%rsp) + +#define tabent (1*NUMSIZE)(%rsp) +#define ymx_2 (1*NUMSIZE)(%rsp) +#define xpy_2 (2*NUMSIZE)(%rsp) +#define kxy_2 (3*NUMSIZE)(%rsp) + +#define acc (4*NUMSIZE)(%rsp) +#define x_1 (4*NUMSIZE)(%rsp) +#define y_1 (5*NUMSIZE)(%rsp) +#define z_1 (6*NUMSIZE)(%rsp) +#define w_1 (7*NUMSIZE)(%rsp) +#define x_3 (4*NUMSIZE)(%rsp) +#define y_3 (5*NUMSIZE)(%rsp) +#define z_3 (6*NUMSIZE)(%rsp) +#define w_3 (7*NUMSIZE)(%rsp) + +#define tmpspace (8*NUMSIZE)(%rsp) +#define t0 (8*NUMSIZE)(%rsp) +#define t1 (9*NUMSIZE)(%rsp) +#define t2 (10*NUMSIZE)(%rsp) +#define t3 (11*NUMSIZE)(%rsp) +#define t4 (12*NUMSIZE)(%rsp) +#define t5 (13*NUMSIZE)(%rsp) + +// Stable homes for the input result pointer, and other variables + +#define res 14*NUMSIZE(%rsp) + +#define i 14*NUMSIZE+8(%rsp) + +#define bias 14*NUMSIZE+16(%rsp) + +#define bf 14*NUMSIZE+24(%rsp) +#define ix 14*NUMSIZE+24(%rsp) + +#define tab 15*NUMSIZE(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (15*NUMSIZE+8) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519_alt. + +#define mul_p25519(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + leaq 0x1(%r12), %rax ; \ + movl $0x13, %esi ; \ + bts $63, %r11 ; \ + imulq %rsi, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rsi, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rcx, %r9 ; \ + sbbq %rcx, %r10 ; \ + sbbq %rcx, %r11 ; \ + btr $63, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %ebx ; \ + movq %r12, %rax ; \ + mulq %rbx; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +#define double_twice4(P0,P1) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq %r8, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq %r9, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq %r10, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq %r11, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +S2N_BN_SYMBOL(edwards25519_scalarmulbase_alt): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from keeping code and data together. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + callq curve25519_x25519base_standard + popq %rsi + popq %rdi + ret + +curve25519_x25519base_standard: +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy the input scalar x to its local variable while reducing it +// modulo 2^252 + m where m = 27742317777372353535851937790883648493; +// this is the order of the basepoint so this doesn't change the result. +// First do q = floor(x/2^252) and x' = x - q * (2^252 + m), which gives +// an initial result -15 * m <= x' < 2^252 + + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + + movq %r11, %rcx + shrq $60, %rcx + + movq $0x5812631a5cf5d3ed, %rax + mulq %rcx + movq %rax, %r12 + movq %rdx, %r13 + movq $0x14def9dea2f79cd6, %rax + mulq %rcx + addq %rax, %r13 + adcq $0, %rdx + shlq $60, %rcx + + subq %r12, %r8 + sbbq %r13, %r9 + sbbq %rdx, %r10 + sbbq %rcx, %r11 + +// If x' < 0 then just directly negate it; this makes sure the +// reduced argument is strictly 0 <= x' < 2^252, but now we need +// to record (done via bit 255 of the reduced scalar, which is +// ignored in the main loop) when we negated so we can flip +// the end result to compensate. + + sbbq %rax, %rax + + xorq %rax, %r8 + xorq %rax, %r9 + xorq %rax, %r10 + xorq %rax, %r11 + + negq %rax + adcq $0, %r8 + adcq $0, %r9 + adcq $0, %r10 + adcq $0, %r11 + + shlq $63, %rax + orq %rax, %r11 + +// And before we store the scalar, test and reset bit 251 to +// initialize the main loop just below. + + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + btr $59, %r11 + movq %r11, 24(%rsp) + +// The main part of the computation is in extended-projective coordinates +// (X,Y,Z,T), representing an affine point on the edwards25519 curve +// (x,y) via x = X/Z, y = Y/Z and x * y = T/Z (so X * Y = T * Z). +// In comments B means the standard basepoint (x,4/5) = +// (0x216....f25d51a,0x0x6666..666658). +// +// Initialize accumulator "acc" to either 0 or 2^251 * B depending on +// bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. + + leaq edwards25519_0g(%rip), %r10 + leaq edwards25519_251g(%rip), %r11 + + movq (%r10), %rax + movq (%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*16(%rsp) + + movq 8*1(%r10), %rax + movq 8*1(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*17(%rsp) + + movq 8*2(%r10), %rax + movq 8*2(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*18(%rsp) + + movq 8*3(%r10), %rax + movq 8*3(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*19(%rsp) + + movq 8*4(%r10), %rax + movq 8*4(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*20(%rsp) + + movq 8*5(%r10), %rax + movq 8*5(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*21(%rsp) + + movq 8*6(%r10), %rax + movq 8*6(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*22(%rsp) + + movq 8*7(%r10), %rax + movq 8*7(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*23(%rsp) + + movl $1, %eax + movq %rax, 8*24(%rsp) + movl $0, %eax + movq %rax, 8*25(%rsp) + movq %rax, 8*26(%rsp) + movq %rax, 8*27(%rsp) + + movq 8*8(%r10), %rax + movq 8*8(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*28(%rsp) + + movq 8*9(%r10), %rax + movq 8*9(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*29(%rsp) + + movq 8*10(%r10), %rax + movq 8*10(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*30(%rsp) + + movq 8*11(%r10), %rax + movq 8*11(%r11), %rcx + cmovcq %rcx, %rax + movq %rax, 8*31(%rsp) + +// The counter "i" tracks the bit position for which the scalar has +// already been absorbed, starting at 0 and going up in chunks of 4. +// +// The pointer "tab" points at the current block of the table for +// multiples (2^i * j) * B at the current bit position i; 1 <= j <= 8. +// +// The bias is always either 0 and 1 and needs to be added to the +// partially processed scalar implicitly. This is used to absorb 4 bits +// of scalar per iteration from 3-bit table indexing by exploiting +// negation: (16 * h + l) * B = (16 * (h + 1) - (16 - l)) * B is used +// when l >= 9. Note that we can't have any bias left over at the +// end because we made sure bit 251 is clear in the reduced scalar. + + movq $0, i + leaq edwards25519_gtable(%rip), %rax + movq %rax, tab + movq $0, bias + +// Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 + +scalarloop: + +// Look at the next 4-bit field "bf", adding the previous bias as well. +// Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, +// setting the bias to 1 for the next iteration in the latter case. + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq (%rsp,%rax,8), %rax // Exploiting scalar = sp exactly + shrq %cl, %rax + andq $15, %rax + addq bias, %rax + movq %rax, bf + + cmpq $9, bf + sbbq %rax, %rax + incq %rax + movq %rax, bias + + movq $16, %rdi + subq bf, %rdi + cmpq $0, bias + cmovzq bf, %rdi + movq %rdi, ix + +// Perform constant-time lookup in the table to get element number "ix". +// The table entry for the affine point (x,y) is actually a triple +// (y - x,x + y,2 * d * x * y) to precompute parts of the addition. +// Note that "ix" can be 0, so we set up the appropriate identity first. + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + movq tab, %rbp + + cmpq $1, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, ix + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + + addq $96, %rbp + movq %rbp, tab + +// We now have the triple from the table in registers as follows +// +// [%rdx;%rcx;%rbx;%rax] = y - x +// [%r11;%r10;%r9;%r8] = x + y +// [%r15;%r14;%r13;%r12] = 2 * d * x * y +// +// In case bias = 1 we need to negate this. For Edwards curves +// -(x,y) = (-x,y), i.e. we need to negate the x coordinate. +// In this processed encoding, that amounts to swapping the +// first two fields and negating the third. +// +// The optional negation here also pretends bias = 0 whenever +// ix = 0 so that it doesn't need to handle the case of zero +// inputs, since no non-trivial table entries are zero. Note +// that in the zero case the whole negation is trivial, and +// so indeed is the swapping. + + cmpq $0, bias + + movq %rax, %rsi + cmovnzq %r8, %rsi + cmovnzq %rax, %r8 + movq %rsi, 32(%rsp) + movq %r8, 64(%rsp) + + movq %rbx, %rsi + cmovnzq %r9, %rsi + cmovnzq %rbx, %r9 + movq %rsi, 40(%rsp) + movq %r9, 72(%rsp) + + movq %rcx, %rsi + cmovnzq %r10, %rsi + cmovnzq %rcx, %r10 + movq %rsi, 48(%rsp) + movq %r10, 80(%rsp) + + movq %rdx, %rsi + cmovnzq %r11, %rsi + cmovnzq %rdx, %r11 + movq %rsi, 56(%rsp) + movq %r11, 88(%rsp) + + movq $-19, %rax + movq $-1, %rbx + movq $-1, %rcx + movq $0x7fffffffffffffff, %rdx + subq %r12, %rax + sbbq %r13, %rbx + sbbq %r14, %rcx + sbbq %r15, %rdx + + movq ix, %r8 + movq bias, %r9 + testq %r8, %r8 + cmovzq %r8, %r9 + testq %r9, %r9 + + cmovzq %r12, %rax + cmovzq %r13, %rbx + cmovzq %r14, %rcx + cmovzq %r15, %rdx + movq %rax, 96(%rsp) + movq %rbx, 104(%rsp) + movq %rcx, 112(%rsp) + movq %rdx, 120(%rsp) + +// Extended-projective and precomputed mixed addition. +// This is effectively the same as calling the standalone +// function edwards25519_pepadd(acc,acc,tabent), but we +// only retain slightly weaker normalization < 2 * p_25519 +// throughout the inner loop, so the computation is +// slightly different, and faster overall. + + double_twice4(t0,z_1) + sub_twice4(t1,y_1,x_1) + add_twice4(t2,y_1,x_1) + mul_4(t3,w_1,kxy_2) + mul_4(t1,t1,ymx_2) + mul_4(t2,t2,xpy_2) + sub_twice4(t4,t0,t3) + add_twice4(t0,t0,t3) + sub_twice4(t5,t2,t1) + add_twice4(t1,t2,t1) + mul_4(z_3,t4,t0) + mul_4(x_3,t5,t4) + mul_4(y_3,t0,t1) + mul_4(w_3,t5,t1) + +// End of the main loop; move on by 4 bits. + + addq $4, i + cmpq $252, i + jc scalarloop + +// Insert the optional negation of the projective X coordinate, and +// so by extension the final affine x coordinate x = X/Z and thus +// the point P = (x,y). We only know X < 2 * p_25519, so we do the +// negation as 2 * p_25519 - X to keep it nonnegative. From this +// point on we don't need any normalization of the coordinates +// except for making sure that they fit in 4 digits. + + movq 128(%rsp), %r8 + movq 136(%rsp), %r9 + movq 144(%rsp), %r10 + movq 152(%rsp), %r11 + movq $0xffffffffffffffda, %r12 + subq %r8, %r12 + movq $0xffffffffffffffff, %r13 + sbbq %r9, %r13 + movq $0xffffffffffffffff, %r14 + sbbq %r10, %r14 + movq $0xffffffffffffffff, %r15 + sbbq %r11, %r15 + movq 24(%rsp), %rax + btq $63, %rax + cmovcq %r12, %r8 + cmovcq %r13, %r9 + cmovcq %r14, %r10 + cmovcq %r15, %r11 + movq %r8, 128(%rsp) + movq %r9, 136(%rsp) + movq %r10, 144(%rsp) + movq %r11, 152(%rsp) + +// Now we need to map out of the extended-projective representation +// (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means +// first calling the modular inverse to get w_3 = 1/z_3. + + movq $4, %rdi + leaq 224(%rsp), %rsi + leaq 192(%rsp), %rdx + leaq p_25519(%rip), %rcx + leaq 256(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites local variables that are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +innerloop: + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi + cmovbq %r12, %rbp + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 + addq %r15, %r11 + shrq $1, %r12 + addq %rcx, %rcx + addq %rdx, %rdx + decq %r9 + jne innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je wmontend +wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne wmontloop +wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je zmontend +zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne zmontloop +zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb fliploop + subq $0x3a, 0x20(%rsp) + ja outerloop + +// The final result is x = X * inv(Z), y = Y * inv(Z). +// These are the only operations in the whole computation that +// fully reduce modulo p_25519 since now we want the canonical +// answer as output. + + movq res, %rbp + mul_p25519(resx,x_3,w_3) + mul_p25519(resy,y_3,w_3) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus, for the modular inverse + +p_25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// 0 * B = 0 and 2^251 * B in extended-projective coordinates +// but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. + +edwards25519_0g: + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000001 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .quad 0x0000000000000000 + +edwards25519_251g: + + .quad 0x525f946d7c7220e7 + .quad 0x4636b0b2f1e35444 + .quad 0x796e9d70e892ae0f + .quad 0x03dec05fa937adb1 + .quad 0x6d1c271cc6375515 + .quad 0x462588c4a4ca4f14 + .quad 0x691129fee55afc39 + .quad 0x15949f784d8472f5 + .quad 0xbd89e510afad0049 + .quad 0x4d1f08c073b9860e + .quad 0x07716e8b2d00af9d + .quad 0x70d685f68f859714 + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_gtable: + + // 2^0 * 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2^0 * 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 2^0 * 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 2^0 * 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 2^0 * 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 2^0 * 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 2^0 * 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 2^0 * 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + + // 2^4 * 1 * G + + .quad 0x7ec851ca553e2df3 + .quad 0xa71284cba64878b3 + .quad 0xe6b5e4193288d1e7 + .quad 0x4cf210ec5a9a8883 + .quad 0x322d04a52d9021f6 + .quad 0xb9c19f3375c6bf9c + .quad 0x587a3a4342d20b09 + .quad 0x143b1cf8aa64fe61 + .quad 0x9f867c7d968acaab + .quad 0x5f54258e27092729 + .quad 0xd0a7d34bea180975 + .quad 0x21b546a3374126e1 + + // 2^4 * 2 * G + + .quad 0xa94ff858a2888343 + .quad 0xce0ed4565313ed3c + .quad 0xf55c3dcfb5bf34fa + .quad 0x0a653ca5c9eab371 + .quad 0x490a7a45d185218f + .quad 0x9a15377846049335 + .quad 0x0060ea09cc31e1f6 + .quad 0x7e041577f86ee965 + .quad 0x66b2a496ce5b67f3 + .quad 0xff5492d8bd569796 + .quad 0x503cec294a592cd0 + .quad 0x566943650813acb2 + + // 2^4 * 3 * G + + .quad 0xb818db0c26620798 + .quad 0x5d5c31d9606e354a + .quad 0x0982fa4f00a8cdc7 + .quad 0x17e12bcd4653e2d4 + .quad 0x5672f9eb1dabb69d + .quad 0xba70b535afe853fc + .quad 0x47ac0f752796d66d + .quad 0x32a5351794117275 + .quad 0xd3a644a6df648437 + .quad 0x703b6559880fbfdd + .quad 0xcb852540ad3a1aa5 + .quad 0x0900b3f78e4c6468 + + // 2^4 * 4 * G + + .quad 0x0a851b9f679d651b + .quad 0xe108cb61033342f2 + .quad 0xd601f57fe88b30a3 + .quad 0x371f3acaed2dd714 + .quad 0xed280fbec816ad31 + .quad 0x52d9595bd8e6efe3 + .quad 0x0fe71772f6c623f5 + .quad 0x4314030b051e293c + .quad 0xd560005efbf0bcad + .quad 0x8eb70f2ed1870c5e + .quad 0x201f9033d084e6a0 + .quad 0x4c3a5ae1ce7b6670 + + // 2^4 * 5 * G + + .quad 0x4138a434dcb8fa95 + .quad 0x870cf67d6c96840b + .quad 0xde388574297be82c + .quad 0x7c814db27262a55a + .quad 0xbaf875e4c93da0dd + .quad 0xb93282a771b9294d + .quad 0x80d63fb7f4c6c460 + .quad 0x6de9c73dea66c181 + .quad 0x478904d5a04df8f2 + .quad 0xfafbae4ab10142d3 + .quad 0xf6c8ac63555d0998 + .quad 0x5aac4a412f90b104 + + // 2^4 * 6 * G + + .quad 0xc64f326b3ac92908 + .quad 0x5551b282e663e1e0 + .quad 0x476b35f54a1a4b83 + .quad 0x1b9da3fe189f68c2 + .quad 0x603a0d0abd7f5134 + .quad 0x8089c932e1d3ae46 + .quad 0xdf2591398798bd63 + .quad 0x1c145cd274ba0235 + .quad 0x32e8386475f3d743 + .quad 0x365b8baf6ae5d9ef + .quad 0x825238b6385b681e + .quad 0x234929c1167d65e1 + + // 2^4 * 7 * G + + .quad 0x984decaba077ade8 + .quad 0x383f77ad19eb389d + .quad 0xc7ec6b7e2954d794 + .quad 0x59c77b3aeb7c3a7a + .quad 0x48145cc21d099fcf + .quad 0x4535c192cc28d7e5 + .quad 0x80e7c1e548247e01 + .quad 0x4a5f28743b2973ee + .quad 0xd3add725225ccf62 + .quad 0x911a3381b2152c5d + .quad 0xd8b39fad5b08f87d + .quad 0x6f05606b4799fe3b + + // 2^4 * 8 * G + + .quad 0x9ffe9e92177ba962 + .quad 0x98aee71d0de5cae1 + .quad 0x3ff4ae942d831044 + .quad 0x714de12e58533ac8 + .quad 0x5b433149f91b6483 + .quad 0xadb5dc655a2cbf62 + .quad 0x87fa8412632827b3 + .quad 0x60895e91ab49f8d8 + .quad 0xe9ecf2ed0cf86c18 + .quad 0xb46d06120735dfd4 + .quad 0xbc9da09804b96be7 + .quad 0x73e2e62fd96dc26b + + // 2^8 * 1 * G + + .quad 0xed5b635449aa515e + .quad 0xa865c49f0bc6823a + .quad 0x850c1fe95b42d1c4 + .quad 0x30d76d6f03d315b9 + .quad 0x2eccdd0e632f9c1d + .quad 0x51d0b69676893115 + .quad 0x52dfb76ba8637a58 + .quad 0x6dd37d49a00eef39 + .quad 0x6c4444172106e4c7 + .quad 0xfb53d680928d7f69 + .quad 0xb4739ea4694d3f26 + .quad 0x10c697112e864bb0 + + // 2^8 * 2 * G + + .quad 0x6493c4277dbe5fde + .quad 0x265d4fad19ad7ea2 + .quad 0x0e00dfc846304590 + .quad 0x25e61cabed66fe09 + .quad 0x0ca62aa08358c805 + .quad 0x6a3d4ae37a204247 + .quad 0x7464d3a63b11eddc + .quad 0x03bf9baf550806ef + .quad 0x3f13e128cc586604 + .quad 0x6f5873ecb459747e + .quad 0xa0b63dedcc1268f5 + .quad 0x566d78634586e22c + + // 2^8 * 3 * G + + .quad 0x1637a49f9cc10834 + .quad 0xbc8e56d5a89bc451 + .quad 0x1cb5ec0f7f7fd2db + .quad 0x33975bca5ecc35d9 + .quad 0xa1054285c65a2fd0 + .quad 0x6c64112af31667c3 + .quad 0x680ae240731aee58 + .quad 0x14fba5f34793b22a + .quad 0x3cd746166985f7d4 + .quad 0x593e5e84c9c80057 + .quad 0x2fc3f2b67b61131e + .quad 0x14829cea83fc526c + + // 2^8 * 4 * G + + .quad 0xff437b8497dd95c2 + .quad 0x6c744e30aa4eb5a7 + .quad 0x9e0c5d613c85e88b + .quad 0x2fd9c71e5f758173 + .quad 0x21e70b2f4e71ecb8 + .quad 0xe656ddb940a477e3 + .quad 0xbf6556cece1d4f80 + .quad 0x05fc3bc4535d7b7e + .quad 0x24b8b3ae52afdedd + .quad 0x3495638ced3b30cf + .quad 0x33a4bc83a9be8195 + .quad 0x373767475c651f04 + + // 2^8 * 5 * G + + .quad 0x2fba99fd40d1add9 + .quad 0xb307166f96f4d027 + .quad 0x4363f05215f03bae + .quad 0x1fbea56c3b18f999 + .quad 0x634095cb14246590 + .quad 0xef12144016c15535 + .quad 0x9e38140c8910bc60 + .quad 0x6bf5905730907c8c + .quad 0x0fa778f1e1415b8a + .quad 0x06409ff7bac3a77e + .quad 0x6f52d7b89aa29a50 + .quad 0x02521cf67a635a56 + + // 2^8 * 6 * G + + .quad 0x513fee0b0a9d5294 + .quad 0x8f98e75c0fdf5a66 + .quad 0xd4618688bfe107ce + .quad 0x3fa00a7e71382ced + .quad 0xb1146720772f5ee4 + .quad 0xe8f894b196079ace + .quad 0x4af8224d00ac824a + .quad 0x001753d9f7cd6cc4 + .quad 0x3c69232d963ddb34 + .quad 0x1dde87dab4973858 + .quad 0xaad7d1f9a091f285 + .quad 0x12b5fe2fa048edb6 + + // 2^8 * 7 * G + + .quad 0x71f0fbc496fce34d + .quad 0x73b9826badf35bed + .quad 0xd2047261ff28c561 + .quad 0x749b76f96fb1206f + .quad 0xdf2b7c26ad6f1e92 + .quad 0x4b66d323504b8913 + .quad 0x8c409dc0751c8bc3 + .quad 0x6f7e93c20796c7b8 + .quad 0x1f5af604aea6ae05 + .quad 0xc12351f1bee49c99 + .quad 0x61a808b5eeff6b66 + .quad 0x0fcec10f01e02151 + + // 2^8 * 8 * G + + .quad 0x644d58a649fe1e44 + .quad 0x21fcaea231ad777e + .quad 0x02441c5a887fd0d2 + .quad 0x4901aa7183c511f3 + .quad 0x3df2d29dc4244e45 + .quad 0x2b020e7493d8de0a + .quad 0x6cc8067e820c214d + .quad 0x413779166feab90a + .quad 0x08b1b7548c1af8f0 + .quad 0xce0f7a7c246299b4 + .quad 0xf760b0f91e06d939 + .quad 0x41bb887b726d1213 + + // 2^12 * 1 * G + + .quad 0x9267806c567c49d8 + .quad 0x066d04ccca791e6a + .quad 0xa69f5645e3cc394b + .quad 0x5c95b686a0788cd2 + .quad 0x97d980e0aa39f7d2 + .quad 0x35d0384252c6b51c + .quad 0x7d43f49307cd55aa + .quad 0x56bd36cfb78ac362 + .quad 0x2ac519c10d14a954 + .quad 0xeaf474b494b5fa90 + .quad 0xe6af8382a9f87a5a + .quad 0x0dea6db1879be094 + + // 2^12 * 2 * G + + .quad 0xaa66bf547344e5ab + .quad 0xda1258888f1b4309 + .quad 0x5e87d2b3fd564b2f + .quad 0x5b2c78885483b1dd + .quad 0x15baeb74d6a8797a + .quad 0x7ef55cf1fac41732 + .quad 0x29001f5a3c8b05c5 + .quad 0x0ad7cc8752eaccfb + .quad 0x52151362793408cf + .quad 0xeb0f170319963d94 + .quad 0xa833b2fa883d9466 + .quad 0x093a7fa775003c78 + + // 2^12 * 3 * G + + .quad 0xe5107de63a16d7be + .quad 0xa377ffdc9af332cf + .quad 0x70d5bf18440b677f + .quad 0x6a252b19a4a31403 + .quad 0xb8e9604460a91286 + .quad 0x7f3fd8047778d3de + .quad 0x67d01e31bf8a5e2d + .quad 0x7b038a06c27b653e + .quad 0x9ed919d5d36990f3 + .quad 0x5213aebbdb4eb9f2 + .quad 0xc708ea054cb99135 + .quad 0x58ded57f72260e56 + + // 2^12 * 4 * G + + .quad 0x78e79dade9413d77 + .quad 0xf257f9d59729e67d + .quad 0x59db910ee37aa7e6 + .quad 0x6aa11b5bbb9e039c + .quad 0xda6d53265b0fd48b + .quad 0x8960823193bfa988 + .quad 0xd78ac93261d57e28 + .quad 0x79f2942d3a5c8143 + .quad 0x97da2f25b6c88de9 + .quad 0x251ba7eaacf20169 + .quad 0x09b44f87ef4eb4e4 + .quad 0x7d90ab1bbc6a7da5 + + // 2^12 * 5 * G + + .quad 0x9acca683a7016bfe + .quad 0x90505f4df2c50b6d + .quad 0x6b610d5fcce435aa + .quad 0x19a10d446198ff96 + .quad 0x1a07a3f496b3c397 + .quad 0x11ceaa188f4e2532 + .quad 0x7d9498d5a7751bf0 + .quad 0x19ed161f508dd8a0 + .quad 0x560a2cd687dce6ca + .quad 0x7f3568c48664cf4d + .quad 0x8741e95222803a38 + .quad 0x483bdab1595653fc + + // 2^12 * 6 * G + + .quad 0xfa780f148734fa49 + .quad 0x106f0b70360534e0 + .quad 0x2210776fe3e307bd + .quad 0x3286c109dde6a0fe + .quad 0xd6cf4d0ab4da80f6 + .quad 0x82483e45f8307fe0 + .quad 0x05005269ae6f9da4 + .quad 0x1c7052909cf7877a + .quad 0x32ee7de2874e98d4 + .quad 0x14c362e9b97e0c60 + .quad 0x5781dcde6a60a38a + .quad 0x217dd5eaaa7aa840 + + // 2^12 * 7 * G + + .quad 0x9db7c4d0248e1eb0 + .quad 0xe07697e14d74bf52 + .quad 0x1e6a9b173c562354 + .quad 0x7fa7c21f795a4965 + .quad 0x8bdf1fb9be8c0ec8 + .quad 0x00bae7f8e30a0282 + .quad 0x4963991dad6c4f6c + .quad 0x07058a6e5df6f60a + .quad 0xe9eb02c4db31f67f + .quad 0xed25fd8910bcfb2b + .quad 0x46c8131f5c5cddb4 + .quad 0x33b21c13a0cb9bce + + // 2^12 * 8 * G + + .quad 0x360692f8087d8e31 + .quad 0xf4dcc637d27163f7 + .quad 0x25a4e62065ea5963 + .quad 0x659bf72e5ac160d9 + .quad 0x9aafb9b05ee38c5b + .quad 0xbf9d2d4e071a13c7 + .quad 0x8eee6e6de933290a + .quad 0x1c3bab17ae109717 + .quad 0x1c9ab216c7cab7b0 + .quad 0x7d65d37407bbc3cc + .quad 0x52744750504a58d5 + .quad 0x09f2606b131a2990 + + // 2^16 * 1 * G + + .quad 0x40e87d44744346be + .quad 0x1d48dad415b52b25 + .quad 0x7c3a8a18a13b603e + .quad 0x4eb728c12fcdbdf7 + .quad 0x7e234c597c6691ae + .quad 0x64889d3d0a85b4c8 + .quad 0xdae2c90c354afae7 + .quad 0x0a871e070c6a9e1d + .quad 0x3301b5994bbc8989 + .quad 0x736bae3a5bdd4260 + .quad 0x0d61ade219d59e3c + .quad 0x3ee7300f2685d464 + + // 2^16 * 2 * G + + .quad 0xf5d255e49e7dd6b7 + .quad 0x8016115c610b1eac + .quad 0x3c99975d92e187ca + .quad 0x13815762979125c2 + .quad 0x43fa7947841e7518 + .quad 0xe5c6fa59639c46d7 + .quad 0xa1065e1de3052b74 + .quad 0x7d47c6a2cfb89030 + .quad 0x3fdad0148ef0d6e0 + .quad 0x9d3e749a91546f3c + .quad 0x71ec621026bb8157 + .quad 0x148cf58d34c9ec80 + + // 2^16 * 3 * G + + .quad 0x46a492f67934f027 + .quad 0x469984bef6840aa9 + .quad 0x5ca1bc2a89611854 + .quad 0x3ff2fa1ebd5dbbd4 + .quad 0xe2572f7d9ae4756d + .quad 0x56c345bb88f3487f + .quad 0x9fd10b6d6960a88d + .quad 0x278febad4eaea1b9 + .quad 0xb1aa681f8c933966 + .quad 0x8c21949c20290c98 + .quad 0x39115291219d3c52 + .quad 0x4104dd02fe9c677b + + // 2^16 * 4 * G + + .quad 0x72b2bf5e1124422a + .quad 0xa1fa0c3398a33ab5 + .quad 0x94cb6101fa52b666 + .quad 0x2c863b00afaf53d5 + .quad 0x81214e06db096ab8 + .quad 0x21a8b6c90ce44f35 + .quad 0x6524c12a409e2af5 + .quad 0x0165b5a48efca481 + .quad 0xf190a474a0846a76 + .quad 0x12eff984cd2f7cc0 + .quad 0x695e290658aa2b8f + .quad 0x591b67d9bffec8b8 + + // 2^16 * 5 * G + + .quad 0x312f0d1c80b49bfa + .quad 0x5979515eabf3ec8a + .quad 0x727033c09ef01c88 + .quad 0x3de02ec7ca8f7bcb + .quad 0x99b9b3719f18b55d + .quad 0xe465e5faa18c641e + .quad 0x61081136c29f05ed + .quad 0x489b4f867030128b + .quad 0xd232102d3aeb92ef + .quad 0xe16253b46116a861 + .quad 0x3d7eabe7190baa24 + .quad 0x49f5fbba496cbebf + + // 2^16 * 6 * G + + .quad 0x30949a108a5bcfd4 + .quad 0xdc40dd70bc6473eb + .quad 0x92c294c1307c0d1c + .quad 0x5604a86dcbfa6e74 + .quad 0x155d628c1e9c572e + .quad 0x8a4d86acc5884741 + .quad 0x91a352f6515763eb + .quad 0x06a1a6c28867515b + .quad 0x7288d1d47c1764b6 + .quad 0x72541140e0418b51 + .quad 0x9f031a6018acf6d1 + .quad 0x20989e89fe2742c6 + + // 2^16 * 7 * G + + .quad 0x499777fd3a2dcc7f + .quad 0x32857c2ca54fd892 + .quad 0xa279d864d207e3a0 + .quad 0x0403ed1d0ca67e29 + .quad 0x1674278b85eaec2e + .quad 0x5621dc077acb2bdf + .quad 0x640a4c1661cbf45a + .quad 0x730b9950f70595d3 + .quad 0xc94b2d35874ec552 + .quad 0xc5e6c8cf98246f8d + .quad 0xf7cb46fa16c035ce + .quad 0x5bd7454308303dcc + + // 2^16 * 8 * G + + .quad 0x7f9ad19528b24cc2 + .quad 0x7f6b54656335c181 + .quad 0x66b8b66e4fc07236 + .quad 0x133a78007380ad83 + .quad 0x85c4932115e7792a + .quad 0xc64c89a2bdcdddc9 + .quad 0x9d1e3da8ada3d762 + .quad 0x5bb7db123067f82c + .quad 0x0961f467c6ca62be + .quad 0x04ec21d6211952ee + .quad 0x182360779bd54770 + .quad 0x740dca6d58f0e0d2 + + // 2^20 * 1 * G + + .quad 0x50b70bf5d3f0af0b + .quad 0x4feaf48ae32e71f7 + .quad 0x60e84ed3a55bbd34 + .quad 0x00ed489b3f50d1ed + .quad 0x3906c72aed261ae5 + .quad 0x9ab68fd988e100f7 + .quad 0xf5e9059af3360197 + .quad 0x0e53dc78bf2b6d47 + .quad 0xb90829bf7971877a + .quad 0x5e4444636d17e631 + .quad 0x4d05c52e18276893 + .quad 0x27632d9a5a4a4af5 + + // 2^20 * 2 * G + + .quad 0xd11ff05154b260ce + .quad 0xd86dc38e72f95270 + .quad 0x601fcd0d267cc138 + .quad 0x2b67916429e90ccd + .quad 0xa98285d187eaffdb + .quad 0xa5b4fbbbd8d0a864 + .quad 0xb658f27f022663f7 + .quad 0x3bbc2b22d99ce282 + .quad 0xb917c952583c0a58 + .quad 0x653ff9b80fe4c6f3 + .quad 0x9b0da7d7bcdf3c0c + .quad 0x43a0eeb6ab54d60e + + // 2^20 * 3 * G + + .quad 0x396966a46d4a5487 + .quad 0xf811a18aac2bb3ba + .quad 0x66e4685b5628b26b + .quad 0x70a477029d929b92 + .quad 0x3ac6322357875fe8 + .quad 0xd9d4f4ecf5fbcb8f + .quad 0x8dee8493382bb620 + .quad 0x50c5eaa14c799fdc + .quad 0xdd0edc8bd6f2fb3c + .quad 0x54c63aa79cc7b7a0 + .quad 0xae0b032b2c8d9f1a + .quad 0x6f9ce107602967fb + + // 2^20 * 4 * G + + .quad 0xad1054b1cde1c22a + .quad 0xc4a8e90248eb32df + .quad 0x5f3e7b33accdc0ea + .quad 0x72364713fc79963e + .quad 0x139693063520e0b5 + .quad 0x437fcf7c88ea03fe + .quad 0xf7d4c40bd3c959bc + .quad 0x699154d1f893ded9 + .quad 0x315d5c75b4b27526 + .quad 0xcccb842d0236daa5 + .quad 0x22f0c8a3345fee8e + .quad 0x73975a617d39dbed + + // 2^20 * 5 * G + + .quad 0xe4024df96375da10 + .quad 0x78d3251a1830c870 + .quad 0x902b1948658cd91c + .quad 0x7e18b10b29b7438a + .quad 0x6f37f392f4433e46 + .quad 0x0e19b9a11f566b18 + .quad 0x220fb78a1fd1d662 + .quad 0x362a4258a381c94d + .quad 0x9071d9132b6beb2f + .quad 0x0f26e9ad28418247 + .quad 0xeab91ec9bdec925d + .quad 0x4be65bc8f48af2de + + // 2^20 * 6 * G + + .quad 0x78487feba36e7028 + .quad 0x5f3f13001dd8ce34 + .quad 0x934fb12d4b30c489 + .quad 0x056c244d397f0a2b + .quad 0x1d50fba257c26234 + .quad 0x7bd4823adeb0678b + .quad 0xc2b0dc6ea6538af5 + .quad 0x5665eec6351da73e + .quad 0xdb3ee00943bfb210 + .quad 0x4972018720800ac2 + .quad 0x26ab5d6173bd8667 + .quad 0x20b209c2ab204938 + + // 2^20 * 7 * G + + .quad 0x549e342ac07fb34b + .quad 0x02d8220821373d93 + .quad 0xbc262d70acd1f567 + .quad 0x7a92c9fdfbcac784 + .quad 0x1fcca94516bd3289 + .quad 0x448d65aa41420428 + .quad 0x59c3b7b216a55d62 + .quad 0x49992cc64e612cd8 + .quad 0x65bd1bea70f801de + .quad 0x1befb7c0fe49e28a + .quad 0xa86306cdb1b2ae4a + .quad 0x3b7ac0cd265c2a09 + + // 2^20 * 8 * G + + .quad 0x822bee438c01bcec + .quad 0x530cb525c0fbc73b + .quad 0x48519034c1953fe9 + .quad 0x265cc261e09a0f5b + .quad 0xf0d54e4f22ed39a7 + .quad 0xa2aae91e5608150a + .quad 0xf421b2e9eddae875 + .quad 0x31bc531d6b7de992 + .quad 0xdf3d134da980f971 + .quad 0x7a4fb8d1221a22a7 + .quad 0x3df7d42035aad6d8 + .quad 0x2a14edcc6a1a125e + + // 2^24 * 1 * G + + .quad 0xdf48ee0752cfce4e + .quad 0xc3fffaf306ec08b7 + .quad 0x05710b2ab95459c4 + .quad 0x161d25fa963ea38d + .quad 0x231a8c570478433c + .quad 0xb7b5270ec281439d + .quad 0xdbaa99eae3d9079f + .quad 0x2c03f5256c2b03d9 + .quad 0x790f18757b53a47d + .quad 0x307b0130cf0c5879 + .quad 0x31903d77257ef7f9 + .quad 0x699468bdbd96bbaf + + // 2^24 * 2 * G + + .quad 0xbd1f2f46f4dafecf + .quad 0x7cef0114a47fd6f7 + .quad 0xd31ffdda4a47b37f + .quad 0x525219a473905785 + .quad 0xd8dd3de66aa91948 + .quad 0x485064c22fc0d2cc + .quad 0x9b48246634fdea2f + .quad 0x293e1c4e6c4a2e3a + .quad 0x376e134b925112e1 + .quad 0x703778b5dca15da0 + .quad 0xb04589af461c3111 + .quad 0x5b605c447f032823 + + // 2^24 * 3 * G + + .quad 0xb965805920c47c89 + .quad 0xe7f0100c923b8fcc + .quad 0x0001256502e2ef77 + .quad 0x24a76dcea8aeb3ee + .quad 0x3be9fec6f0e7f04c + .quad 0x866a579e75e34962 + .quad 0x5542ef161e1de61a + .quad 0x2f12fef4cc5abdd5 + .quad 0x0a4522b2dfc0c740 + .quad 0x10d06e7f40c9a407 + .quad 0xc6cf144178cff668 + .quad 0x5e607b2518a43790 + + // 2^24 * 4 * G + + .quad 0x58b31d8f6cdf1818 + .quad 0x35cfa74fc36258a2 + .quad 0xe1b3ff4f66e61d6e + .quad 0x5067acab6ccdd5f7 + .quad 0xa02c431ca596cf14 + .quad 0xe3c42d40aed3e400 + .quad 0xd24526802e0f26db + .quad 0x201f33139e457068 + .quad 0xfd527f6b08039d51 + .quad 0x18b14964017c0006 + .quad 0xd5220eb02e25a4a8 + .quad 0x397cba8862460375 + + // 2^24 * 5 * G + + .quad 0x30c13093f05959b2 + .quad 0xe23aa18de9a97976 + .quad 0x222fd491721d5e26 + .quad 0x2339d320766e6c3a + .quad 0x7815c3fbc81379e7 + .quad 0xa6619420dde12af1 + .quad 0xffa9c0f885a8fdd5 + .quad 0x771b4022c1e1c252 + .quad 0xd87dd986513a2fa7 + .quad 0xf5ac9b71f9d4cf08 + .quad 0xd06bc31b1ea283b3 + .quad 0x331a189219971a76 + + // 2^24 * 6 * G + + .quad 0xf5166f45fb4f80c6 + .quad 0x9c36c7de61c775cf + .quad 0xe3d4e81b9041d91c + .quad 0x31167c6b83bdfe21 + .quad 0x26512f3a9d7572af + .quad 0x5bcbe28868074a9e + .quad 0x84edc1c11180f7c4 + .quad 0x1ac9619ff649a67b + .quad 0xf22b3842524b1068 + .quad 0x5068343bee9ce987 + .quad 0xfc9d71844a6250c8 + .quad 0x612436341f08b111 + + // 2^24 * 7 * G + + .quad 0xd99d41db874e898d + .quad 0x09fea5f16c07dc20 + .quad 0x793d2c67d00f9bbc + .quad 0x46ebe2309e5eff40 + .quad 0x8b6349e31a2d2638 + .quad 0x9ddfb7009bd3fd35 + .quad 0x7f8bf1b8a3a06ba4 + .quad 0x1522aa3178d90445 + .quad 0x2c382f5369614938 + .quad 0xdafe409ab72d6d10 + .quad 0xe8c83391b646f227 + .quad 0x45fe70f50524306c + + // 2^24 * 8 * G + + .quad 0xda4875a6960c0b8c + .quad 0x5b68d076ef0e2f20 + .quad 0x07fb51cf3d0b8fd4 + .quad 0x428d1623a0e392d4 + .quad 0x62f24920c8951491 + .quad 0x05f007c83f630ca2 + .quad 0x6fbb45d2f5c9d4b8 + .quad 0x16619f6db57a2245 + .quad 0x084f4a4401a308fd + .quad 0xa82219c376a5caac + .quad 0xdeb8de4643d1bc7d + .quad 0x1d81592d60bd38c6 + + // 2^28 * 1 * G + + .quad 0xd833d7beec2a4c38 + .quad 0x2c9162830acc20ed + .quad 0xe93a47aa92df7581 + .quad 0x702d67a3333c4a81 + .quad 0x3a4a369a2f89c8a1 + .quad 0x63137a1d7c8de80d + .quad 0xbcac008a78eda015 + .quad 0x2cb8b3a5b483b03f + .quad 0x36e417cbcb1b90a1 + .quad 0x33b3ddaa7f11794e + .quad 0x3f510808885bc607 + .quad 0x24141dc0e6a8020d + + // 2^28 * 2 * G + + .quad 0x59f73c773fefee9d + .quad 0xb3f1ef89c1cf989d + .quad 0xe35dfb42e02e545f + .quad 0x5766120b47a1b47c + .quad 0x91925dccbd83157d + .quad 0x3ca1205322cc8094 + .quad 0x28e57f183f90d6e4 + .quad 0x1a4714cede2e767b + .quad 0xdb20ba0fb8b6b7ff + .quad 0xb732c3b677511fa1 + .quad 0xa92b51c099f02d89 + .quad 0x4f3875ad489ca5f1 + + // 2^28 * 3 * G + + .quad 0xc7fc762f4932ab22 + .quad 0x7ac0edf72f4c3c1b + .quad 0x5f6b55aa9aa895e8 + .quad 0x3680274dad0a0081 + .quad 0x79ed13f6ee73eec0 + .quad 0xa5c6526d69110bb1 + .quad 0xe48928c38603860c + .quad 0x722a1446fd7059f5 + .quad 0xd0959fe9a8cf8819 + .quad 0xd0a995508475a99c + .quad 0x6eac173320b09cc5 + .quad 0x628ecf04331b1095 + + // 2^28 * 4 * G + + .quad 0x98bcb118a9d0ddbc + .quad 0xee449e3408b4802b + .quad 0x87089226b8a6b104 + .quad 0x685f349a45c7915d + .quad 0x9b41acf85c74ccf1 + .quad 0xb673318108265251 + .quad 0x99c92aed11adb147 + .quad 0x7a47d70d34ecb40f + .quad 0x60a0c4cbcc43a4f5 + .quad 0x775c66ca3677bea9 + .quad 0xa17aa1752ff8f5ed + .quad 0x11ded9020e01fdc0 + + // 2^28 * 5 * G + + .quad 0x890e7809caefe704 + .quad 0x8728296de30e8c6c + .quad 0x4c5cd2a392aeb1c9 + .quad 0x194263d15771531f + .quad 0x471f95b03bea93b7 + .quad 0x0552d7d43313abd3 + .quad 0xbd9370e2e17e3f7b + .quad 0x7b120f1db20e5bec + .quad 0x17d2fb3d86502d7a + .quad 0xb564d84450a69352 + .quad 0x7da962c8a60ed75d + .quad 0x00d0f85b318736aa + + // 2^28 * 6 * G + + .quad 0x978b142e777c84fd + .quad 0xf402644705a8c062 + .quad 0xa67ad51be7e612c7 + .quad 0x2f7b459698dd6a33 + .quad 0xa6753c1efd7621c1 + .quad 0x69c0b4a7445671f5 + .quad 0x971f527405b23c11 + .quad 0x387bc74851a8c7cd + .quad 0x81894b4d4a52a9a8 + .quad 0xadd93e12f6b8832f + .quad 0x184d8548b61bd638 + .quad 0x3f1c62dbd6c9f6cd + + // 2^28 * 7 * G + + .quad 0x2e8f1f0091910c1f + .quad 0xa4df4fe0bff2e12c + .quad 0x60c6560aee927438 + .quad 0x6338283facefc8fa + .quad 0x3fad3e40148f693d + .quad 0x052656e194eb9a72 + .quad 0x2f4dcbfd184f4e2f + .quad 0x406f8db1c482e18b + .quad 0x9e630d2c7f191ee4 + .quad 0x4fbf8301bc3ff670 + .quad 0x787d8e4e7afb73c4 + .quad 0x50d83d5be8f58fa5 + + // 2^28 * 8 * G + + .quad 0x85683916c11a1897 + .quad 0x2d69a4efe506d008 + .quad 0x39af1378f664bd01 + .quad 0x65942131361517c6 + .quad 0xc0accf90b4d3b66d + .quad 0xa7059de561732e60 + .quad 0x033d1f7870c6b0ba + .quad 0x584161cd26d946e4 + .quad 0xbbf2b1a072d27ca2 + .quad 0xbf393c59fbdec704 + .quad 0xe98dbbcee262b81e + .quad 0x02eebd0b3029b589 + + // 2^32 * 1 * G + + .quad 0x61368756a60dac5f + .quad 0x17e02f6aebabdc57 + .quad 0x7f193f2d4cce0f7d + .quad 0x20234a7789ecdcf0 + .quad 0x8765b69f7b85c5e8 + .quad 0x6ff0678bd168bab2 + .quad 0x3a70e77c1d330f9b + .quad 0x3a5f6d51b0af8e7c + .quad 0x76d20db67178b252 + .quad 0x071c34f9d51ed160 + .quad 0xf62a4a20b3e41170 + .quad 0x7cd682353cffe366 + + // 2^32 * 2 * G + + .quad 0x0be1a45bd887fab6 + .quad 0x2a846a32ba403b6e + .quad 0xd9921012e96e6000 + .quad 0x2838c8863bdc0943 + .quad 0xa665cd6068acf4f3 + .quad 0x42d92d183cd7e3d3 + .quad 0x5759389d336025d9 + .quad 0x3ef0253b2b2cd8ff + .quad 0xd16bb0cf4a465030 + .quad 0xfa496b4115c577ab + .quad 0x82cfae8af4ab419d + .quad 0x21dcb8a606a82812 + + // 2^32 * 3 * G + + .quad 0x5c6004468c9d9fc8 + .quad 0x2540096ed42aa3cb + .quad 0x125b4d4c12ee2f9c + .quad 0x0bc3d08194a31dab + .quad 0x9a8d00fabe7731ba + .quad 0x8203607e629e1889 + .quad 0xb2cc023743f3d97f + .quad 0x5d840dbf6c6f678b + .quad 0x706e380d309fe18b + .quad 0x6eb02da6b9e165c7 + .quad 0x57bbba997dae20ab + .quad 0x3a4276232ac196dd + + // 2^32 * 4 * G + + .quad 0x4b42432c8a7084fa + .quad 0x898a19e3dfb9e545 + .quad 0xbe9f00219c58e45d + .quad 0x1ff177cea16debd1 + .quad 0x3bf8c172db447ecb + .quad 0x5fcfc41fc6282dbd + .quad 0x80acffc075aa15fe + .quad 0x0770c9e824e1a9f9 + .quad 0xcf61d99a45b5b5fd + .quad 0x860984e91b3a7924 + .quad 0xe7300919303e3e89 + .quad 0x39f264fd41500b1e + + // 2^32 * 5 * G + + .quad 0xa7ad3417dbe7e29c + .quad 0xbd94376a2b9c139c + .quad 0xa0e91b8e93597ba9 + .quad 0x1712d73468889840 + .quad 0xd19b4aabfe097be1 + .quad 0xa46dfce1dfe01929 + .quad 0xc3c908942ca6f1ff + .quad 0x65c621272c35f14e + .quad 0xe72b89f8ce3193dd + .quad 0x4d103356a125c0bb + .quad 0x0419a93d2e1cfe83 + .quad 0x22f9800ab19ce272 + + // 2^32 * 6 * G + + .quad 0x605a368a3e9ef8cb + .quad 0xe3e9c022a5504715 + .quad 0x553d48b05f24248f + .quad 0x13f416cd647626e5 + .quad 0x42029fdd9a6efdac + .quad 0xb912cebe34a54941 + .quad 0x640f64b987bdf37b + .quad 0x4171a4d38598cab4 + .quad 0xfa2758aa99c94c8c + .quad 0x23006f6fb000b807 + .quad 0xfbd291ddadda5392 + .quad 0x508214fa574bd1ab + + // 2^32 * 7 * G + + .quad 0xc20269153ed6fe4b + .quad 0xa65a6739511d77c4 + .quad 0xcbde26462c14af94 + .quad 0x22f960ec6faba74b + .quad 0x461a15bb53d003d6 + .quad 0xb2102888bcf3c965 + .quad 0x27c576756c683a5a + .quad 0x3a7758a4c86cb447 + .quad 0x548111f693ae5076 + .quad 0x1dae21df1dfd54a6 + .quad 0x12248c90f3115e65 + .quad 0x5d9fd15f8de7f494 + + // 2^32 * 8 * G + + .quad 0x031408d36d63727f + .quad 0x6a379aefd7c7b533 + .quad 0xa9e18fc5ccaee24b + .quad 0x332f35914f8fbed3 + .quad 0x3f244d2aeed7521e + .quad 0x8e3a9028432e9615 + .quad 0xe164ba772e9c16d4 + .quad 0x3bc187fa47eb98d8 + .quad 0x6d470115ea86c20c + .quad 0x998ab7cb6c46d125 + .quad 0xd77832b53a660188 + .quad 0x450d81ce906fba03 + + // 2^36 * 1 * G + + .quad 0xf8ae4d2ad8453902 + .quad 0x7018058ee8db2d1d + .quad 0xaab3995fc7d2c11e + .quad 0x53b16d2324ccca79 + .quad 0x23264d66b2cae0b5 + .quad 0x7dbaed33ebca6576 + .quad 0x030ebed6f0d24ac8 + .quad 0x2a887f78f7635510 + .quad 0x2a23b9e75c012d4f + .quad 0x0c974651cae1f2ea + .quad 0x2fb63273675d70ca + .quad 0x0ba7250b864403f5 + + // 2^36 * 2 * G + + .quad 0xbb0d18fd029c6421 + .quad 0xbc2d142189298f02 + .quad 0x8347f8e68b250e96 + .quad 0x7b9f2fe8032d71c9 + .quad 0xdd63589386f86d9c + .quad 0x61699176e13a85a4 + .quad 0x2e5111954eaa7d57 + .quad 0x32c21b57fb60bdfb + .quad 0xd87823cd319e0780 + .quad 0xefc4cfc1897775c5 + .quad 0x4854fb129a0ab3f7 + .quad 0x12c49d417238c371 + + // 2^36 * 3 * G + + .quad 0x0950b533ffe83769 + .quad 0x21861c1d8e1d6bd1 + .quad 0xf022d8381302e510 + .quad 0x2509200c6391cab4 + .quad 0x09b3a01783799542 + .quad 0x626dd08faad5ee3f + .quad 0xba00bceeeb70149f + .quad 0x1421b246a0a444c9 + .quad 0x4aa43a8e8c24a7c7 + .quad 0x04c1f540d8f05ef5 + .quad 0xadba5e0c0b3eb9dc + .quad 0x2ab5504448a49ce3 + + // 2^36 * 4 * G + + .quad 0x2ed227266f0f5dec + .quad 0x9824ee415ed50824 + .quad 0x807bec7c9468d415 + .quad 0x7093bae1b521e23f + .quad 0xdc07ac631c5d3afa + .quad 0x58615171f9df8c6c + .quad 0x72a079d89d73e2b0 + .quad 0x7301f4ceb4eae15d + .quad 0x6409e759d6722c41 + .quad 0xa674e1cf72bf729b + .quad 0xbc0a24eb3c21e569 + .quad 0x390167d24ebacb23 + + // 2^36 * 5 * G + + .quad 0x27f58e3bba353f1c + .quad 0x4c47764dbf6a4361 + .quad 0xafbbc4e56e562650 + .quad 0x07db2ee6aae1a45d + .quad 0xd7bb054ba2f2120b + .quad 0xe2b9ceaeb10589b7 + .quad 0x3fe8bac8f3c0edbe + .quad 0x4cbd40767112cb69 + .quad 0x0b603cc029c58176 + .quad 0x5988e3825cb15d61 + .quad 0x2bb61413dcf0ad8d + .quad 0x7b8eec6c74183287 + + // 2^36 * 6 * G + + .quad 0xe4ca40782cd27cb0 + .quad 0xdaf9c323fbe967bd + .quad 0xb29bd34a8ad41e9e + .quad 0x72810497626ede4d + .quad 0x32fee570fc386b73 + .quad 0xda8b0141da3a8cc7 + .quad 0x975ffd0ac8968359 + .quad 0x6ee809a1b132a855 + .quad 0x9444bb31fcfd863a + .quad 0x2fe3690a3e4e48c5 + .quad 0xdc29c867d088fa25 + .quad 0x13bd1e38d173292e + + // 2^36 * 7 * G + + .quad 0xd32b4cd8696149b5 + .quad 0xe55937d781d8aab7 + .quad 0x0bcb2127ae122b94 + .quad 0x41e86fcfb14099b0 + .quad 0x223fb5cf1dfac521 + .quad 0x325c25316f554450 + .quad 0x030b98d7659177ac + .quad 0x1ed018b64f88a4bd + .quad 0x3630dfa1b802a6b0 + .quad 0x880f874742ad3bd5 + .quad 0x0af90d6ceec5a4d4 + .quad 0x746a247a37cdc5d9 + + // 2^36 * 8 * G + + .quad 0xd531b8bd2b7b9af6 + .quad 0x5005093537fc5b51 + .quad 0x232fcf25c593546d + .quad 0x20a365142bb40f49 + .quad 0x6eccd85278d941ed + .quad 0x2254ae83d22f7843 + .quad 0xc522d02e7bbfcdb7 + .quad 0x681e3351bff0e4e2 + .quad 0x8b64b59d83034f45 + .quad 0x2f8b71f21fa20efb + .quad 0x69249495ba6550e4 + .quad 0x539ef98e45d5472b + + // 2^40 * 1 * G + + .quad 0x6e7bb6a1a6205275 + .quad 0xaa4f21d7413c8e83 + .quad 0x6f56d155e88f5cb2 + .quad 0x2de25d4ba6345be1 + .quad 0xd074d8961cae743f + .quad 0xf86d18f5ee1c63ed + .quad 0x97bdc55be7f4ed29 + .quad 0x4cbad279663ab108 + .quad 0x80d19024a0d71fcd + .quad 0xc525c20afb288af8 + .quad 0xb1a3974b5f3a6419 + .quad 0x7d7fbcefe2007233 + + // 2^40 * 2 * G + + .quad 0xfaef1e6a266b2801 + .quad 0x866c68c4d5739f16 + .quad 0xf68a2fbc1b03762c + .quad 0x5975435e87b75a8d + .quad 0xcd7c5dc5f3c29094 + .quad 0xc781a29a2a9105ab + .quad 0x80c61d36421c3058 + .quad 0x4f9cd196dcd8d4d7 + .quad 0x199297d86a7b3768 + .quad 0xd0d058241ad17a63 + .quad 0xba029cad5c1c0c17 + .quad 0x7ccdd084387a0307 + + // 2^40 * 3 * G + + .quad 0xdca6422c6d260417 + .quad 0xae153d50948240bd + .quad 0xa9c0c1b4fb68c677 + .quad 0x428bd0ed61d0cf53 + .quad 0x9b0c84186760cc93 + .quad 0xcdae007a1ab32a99 + .quad 0xa88dec86620bda18 + .quad 0x3593ca848190ca44 + .quad 0x9213189a5e849aa7 + .quad 0xd4d8c33565d8facd + .quad 0x8c52545b53fdbbd1 + .quad 0x27398308da2d63e6 + + // 2^40 * 4 * G + + .quad 0x42c38d28435ed413 + .quad 0xbd50f3603278ccc9 + .quad 0xbb07ab1a79da03ef + .quad 0x269597aebe8c3355 + .quad 0xb9a10e4c0a702453 + .quad 0x0fa25866d57d1bde + .quad 0xffb9d9b5cd27daf7 + .quad 0x572c2945492c33fd + .quad 0xc77fc745d6cd30be + .quad 0xe4dfe8d3e3baaefb + .quad 0xa22c8830aa5dda0c + .quad 0x7f985498c05bca80 + + // 2^40 * 5 * G + + .quad 0x3849ce889f0be117 + .quad 0x8005ad1b7b54a288 + .quad 0x3da3c39f23fc921c + .quad 0x76c2ec470a31f304 + .quad 0xd35615520fbf6363 + .quad 0x08045a45cf4dfba6 + .quad 0xeec24fbc873fa0c2 + .quad 0x30f2653cd69b12e7 + .quad 0x8a08c938aac10c85 + .quad 0x46179b60db276bcb + .quad 0xa920c01e0e6fac70 + .quad 0x2f1273f1596473da + + // 2^40 * 6 * G + + .quad 0x4739fc7c8ae01e11 + .quad 0xfd5274904a6aab9f + .quad 0x41d98a8287728f2e + .quad 0x5d9e572ad85b69f2 + .quad 0x30488bd755a70bc0 + .quad 0x06d6b5a4f1d442e7 + .quad 0xead1a69ebc596162 + .quad 0x38ac1997edc5f784 + .quad 0x0666b517a751b13b + .quad 0x747d06867e9b858c + .quad 0xacacc011454dde49 + .quad 0x22dfcd9cbfe9e69c + + // 2^40 * 7 * G + + .quad 0x8ddbd2e0c30d0cd9 + .quad 0xad8e665facbb4333 + .quad 0x8f6b258c322a961f + .quad 0x6b2916c05448c1c7 + .quad 0x56ec59b4103be0a1 + .quad 0x2ee3baecd259f969 + .quad 0x797cb29413f5cd32 + .quad 0x0fe9877824cde472 + .quad 0x7edb34d10aba913b + .quad 0x4ea3cd822e6dac0e + .quad 0x66083dff6578f815 + .quad 0x4c303f307ff00a17 + + // 2^40 * 8 * G + + .quad 0xd30a3bd617b28c85 + .quad 0xc5d377b739773bea + .quad 0xc6c6e78c1e6a5cbf + .quad 0x0d61b8f78b2ab7c4 + .quad 0x29fc03580dd94500 + .quad 0xecd27aa46fbbec93 + .quad 0x130a155fc2e2a7f8 + .quad 0x416b151ab706a1d5 + .quad 0x56a8d7efe9c136b0 + .quad 0xbd07e5cd58e44b20 + .quad 0xafe62fda1b57e0ab + .quad 0x191a2af74277e8d2 + + // 2^44 * 1 * G + + .quad 0xd550095bab6f4985 + .quad 0x04f4cd5b4fbfaf1a + .quad 0x9d8e2ed12a0c7540 + .quad 0x2bc24e04b2212286 + .quad 0x09d4b60b2fe09a14 + .quad 0xc384f0afdbb1747e + .quad 0x58e2ea8978b5fd6e + .quad 0x519ef577b5e09b0a + .quad 0x1863d7d91124cca9 + .quad 0x7ac08145b88a708e + .quad 0x2bcd7309857031f5 + .quad 0x62337a6e8ab8fae5 + + // 2^44 * 2 * G + + .quad 0x4bcef17f06ffca16 + .quad 0xde06e1db692ae16a + .quad 0x0753702d614f42b0 + .quad 0x5f6041b45b9212d0 + .quad 0xd1ab324e1b3a1273 + .quad 0x18947cf181055340 + .quad 0x3b5d9567a98c196e + .quad 0x7fa00425802e1e68 + .quad 0x7d531574028c2705 + .quad 0x80317d69db0d75fe + .quad 0x30fface8ef8c8ddd + .quad 0x7e9de97bb6c3e998 + + // 2^44 * 3 * G + + .quad 0x1558967b9e6585a3 + .quad 0x97c99ce098e98b92 + .quad 0x10af149b6eb3adad + .quad 0x42181fe8f4d38cfa + .quad 0xf004be62a24d40dd + .quad 0xba0659910452d41f + .quad 0x81c45ee162a44234 + .quad 0x4cb829d8a22266ef + .quad 0x1dbcaa8407b86681 + .quad 0x081f001e8b26753b + .quad 0x3cd7ce6a84048e81 + .quad 0x78af11633f25f22c + + // 2^44 * 4 * G + + .quad 0x8416ebd40b50babc + .quad 0x1508722628208bee + .quad 0xa3148fafb9c1c36d + .quad 0x0d07daacd32d7d5d + .quad 0x3241c00e7d65318c + .quad 0xe6bee5dcd0e86de7 + .quad 0x118b2dc2fbc08c26 + .quad 0x680d04a7fc603dc3 + .quad 0xf9c2414a695aa3eb + .quad 0xdaa42c4c05a68f21 + .quad 0x7c6c23987f93963e + .quad 0x210e8cd30c3954e3 + + // 2^44 * 5 * G + + .quad 0xac4201f210a71c06 + .quad 0x6a65e0aef3bfb021 + .quad 0xbc42c35c393632f7 + .quad 0x56ea8db1865f0742 + .quad 0x2b50f16137fe6c26 + .quad 0xe102bcd856e404d8 + .quad 0x12b0f1414c561f6b + .quad 0x51b17bc8d028ec91 + .quad 0xfff5fb4bcf535119 + .quad 0xf4989d79df1108a0 + .quad 0xbdfcea659a3ba325 + .quad 0x18a11f1174d1a6f2 + + // 2^44 * 6 * G + + .quad 0x407375ab3f6bba29 + .quad 0x9ec3b6d8991e482e + .quad 0x99c80e82e55f92e9 + .quad 0x307c13b6fb0c0ae1 + .quad 0xfbd63cdad27a5f2c + .quad 0xf00fc4bc8aa106d7 + .quad 0x53fb5c1a8e64a430 + .quad 0x04eaabe50c1a2e85 + .quad 0x24751021cb8ab5e7 + .quad 0xfc2344495c5010eb + .quad 0x5f1e717b4e5610a1 + .quad 0x44da5f18c2710cd5 + + // 2^44 * 7 * G + + .quad 0x033cc55ff1b82eb5 + .quad 0xb15ae36d411cae52 + .quad 0xba40b6198ffbacd3 + .quad 0x768edce1532e861f + .quad 0x9156fe6b89d8eacc + .quad 0xe6b79451e23126a1 + .quad 0xbd7463d93944eb4e + .quad 0x726373f6767203ae + .quad 0xe305ca72eb7ef68a + .quad 0x662cf31f70eadb23 + .quad 0x18f026fdb4c45b68 + .quad 0x513b5384b5d2ecbd + + // 2^44 * 8 * G + + .quad 0x46d46280c729989e + .quad 0x4b93fbd05368a5dd + .quad 0x63df3f81d1765a89 + .quad 0x34cebd64b9a0a223 + .quad 0x5e2702878af34ceb + .quad 0x900b0409b946d6ae + .quad 0x6512ebf7dabd8512 + .quad 0x61d9b76988258f81 + .quad 0xa6c5a71349b7d94b + .quad 0xa3f3d15823eb9446 + .quad 0x0416fbd277484834 + .quad 0x69d45e6f2c70812f + + // 2^48 * 1 * G + + .quad 0xce16f74bc53c1431 + .quad 0x2b9725ce2072edde + .quad 0xb8b9c36fb5b23ee7 + .quad 0x7e2e0e450b5cc908 + .quad 0x9fe62b434f460efb + .quad 0xded303d4a63607d6 + .quad 0xf052210eb7a0da24 + .quad 0x237e7dbe00545b93 + .quad 0x013575ed6701b430 + .quad 0x231094e69f0bfd10 + .quad 0x75320f1583e47f22 + .quad 0x71afa699b11155e3 + + // 2^48 * 2 * G + + .quad 0x65ce6f9b3953b61d + .quad 0xc65839eaafa141e6 + .quad 0x0f435ffda9f759fe + .quad 0x021142e9c2b1c28e + .quad 0xea423c1c473b50d6 + .quad 0x51e87a1f3b38ef10 + .quad 0x9b84bf5fb2c9be95 + .quad 0x00731fbc78f89a1c + .quad 0xe430c71848f81880 + .quad 0xbf960c225ecec119 + .quad 0xb6dae0836bba15e3 + .quad 0x4c4d6f3347e15808 + + // 2^48 * 3 * G + + .quad 0x18f7eccfc17d1fc9 + .quad 0x6c75f5a651403c14 + .quad 0xdbde712bf7ee0cdf + .quad 0x193fddaaa7e47a22 + .quad 0x2f0cddfc988f1970 + .quad 0x6b916227b0b9f51b + .quad 0x6ec7b6c4779176be + .quad 0x38bf9500a88f9fa8 + .quad 0x1fd2c93c37e8876f + .quad 0xa2f61e5a18d1462c + .quad 0x5080f58239241276 + .quad 0x6a6fb99ebf0d4969 + + // 2^48 * 4 * G + + .quad 0x6a46c1bb560855eb + .quad 0x2416bb38f893f09d + .quad 0xd71d11378f71acc1 + .quad 0x75f76914a31896ea + .quad 0xeeb122b5b6e423c6 + .quad 0x939d7010f286ff8e + .quad 0x90a92a831dcf5d8c + .quad 0x136fda9f42c5eb10 + .quad 0xf94cdfb1a305bdd1 + .quad 0x0f364b9d9ff82c08 + .quad 0x2a87d8a5c3bb588a + .quad 0x022183510be8dcba + + // 2^48 * 5 * G + + .quad 0x4af766385ead2d14 + .quad 0xa08ed880ca7c5830 + .quad 0x0d13a6e610211e3d + .quad 0x6a071ce17b806c03 + .quad 0x9d5a710143307a7f + .quad 0xb063de9ec47da45f + .quad 0x22bbfe52be927ad3 + .quad 0x1387c441fd40426c + .quad 0xb5d3c3d187978af8 + .quad 0x722b5a3d7f0e4413 + .quad 0x0d7b4848bb477ca0 + .quad 0x3171b26aaf1edc92 + + // 2^48 * 6 * G + + .quad 0xa92f319097564ca8 + .quad 0xff7bb84c2275e119 + .quad 0x4f55fe37a4875150 + .quad 0x221fd4873cf0835a + .quad 0xa60db7d8b28a47d1 + .quad 0xa6bf14d61770a4f1 + .quad 0xd4a1f89353ddbd58 + .quad 0x6c514a63344243e9 + .quad 0x2322204f3a156341 + .quad 0xfb73e0e9ba0a032d + .quad 0xfce0dd4c410f030e + .quad 0x48daa596fb924aaa + + // 2^48 * 7 * G + + .quad 0x6eca8e665ca59cc7 + .quad 0xa847254b2e38aca0 + .quad 0x31afc708d21e17ce + .quad 0x676dd6fccad84af7 + .quad 0x14f61d5dc84c9793 + .quad 0x9941f9e3ef418206 + .quad 0xcdf5b88f346277ac + .quad 0x58c837fa0e8a79a9 + .quad 0x0cf9688596fc9058 + .quad 0x1ddcbbf37b56a01b + .quad 0xdcc2e77d4935d66a + .quad 0x1c4f73f2c6a57f0a + + // 2^48 * 8 * G + + .quad 0x0e7a4fbd305fa0bb + .quad 0x829d4ce054c663ad + .quad 0xf421c3832fe33848 + .quad 0x795ac80d1bf64c42 + .quad 0xb36e706efc7c3484 + .quad 0x73dfc9b4c3c1cf61 + .quad 0xeb1d79c9781cc7e5 + .quad 0x70459adb7daf675c + .quad 0x1b91db4991b42bb3 + .quad 0x572696234b02dcca + .quad 0x9fdf9ee51f8c78dc + .quad 0x5fe162848ce21fd3 + + // 2^52 * 1 * G + + .quad 0xe2790aae4d077c41 + .quad 0x8b938270db7469a3 + .quad 0x6eb632dc8abd16a2 + .quad 0x720814ecaa064b72 + .quad 0x315c29c795115389 + .quad 0xd7e0e507862f74ce + .quad 0x0c4a762185927432 + .quad 0x72de6c984a25a1e4 + .quad 0xae9ab553bf6aa310 + .quad 0x050a50a9806d6e1b + .quad 0x92bb7403adff5139 + .quad 0x0394d27645be618b + + // 2^52 * 2 * G + + .quad 0x4d572251857eedf4 + .quad 0xe3724edde19e93c5 + .quad 0x8a71420e0b797035 + .quad 0x3b3c833687abe743 + .quad 0xf5396425b23545a4 + .quad 0x15a7a27e98fbb296 + .quad 0xab6c52bc636fdd86 + .quad 0x79d995a8419334ee + .quad 0xcd8a8ea61195dd75 + .quad 0xa504d8a81dd9a82f + .quad 0x540dca81a35879b6 + .quad 0x60dd16a379c86a8a + + // 2^52 * 3 * G + + .quad 0x35a2c8487381e559 + .quad 0x596ffea6d78082cb + .quad 0xcb9771ebdba7b653 + .quad 0x5a08b5019b4da685 + .quad 0x3501d6f8153e47b8 + .quad 0xb7a9675414a2f60c + .quad 0x112ee8b6455d9523 + .quad 0x4e62a3c18112ea8a + .quad 0xc8d4ac04516ab786 + .quad 0x595af3215295b23d + .quad 0xd6edd234db0230c1 + .quad 0x0929efe8825b41cc + + // 2^52 * 4 * G + + .quad 0x5f0601d1cbd0f2d3 + .quad 0x736e412f6132bb7f + .quad 0x83604432238dde87 + .quad 0x1e3a5272f5c0753c + .quad 0x8b3172b7ad56651d + .quad 0x01581b7a3fabd717 + .quad 0x2dc94df6424df6e4 + .quad 0x30376e5d2c29284f + .quad 0xd2918da78159a59c + .quad 0x6bdc1cd93f0713f3 + .quad 0x565f7a934acd6590 + .quad 0x53daacec4cb4c128 + + // 2^52 * 5 * G + + .quad 0x4ca73bd79cc8a7d6 + .quad 0x4d4a738f47e9a9b2 + .quad 0xf4cbf12942f5fe00 + .quad 0x01a13ff9bdbf0752 + .quad 0x99852bc3852cfdb0 + .quad 0x2cc12e9559d6ed0b + .quad 0x70f9e2bf9b5ac27b + .quad 0x4f3b8c117959ae99 + .quad 0x55b6c9c82ff26412 + .quad 0x1ac4a8c91fb667a8 + .quad 0xd527bfcfeb778bf2 + .quad 0x303337da7012a3be + + // 2^52 * 6 * G + + .quad 0x955422228c1c9d7c + .quad 0x01fac1371a9b340f + .quad 0x7e8d9177925b48d7 + .quad 0x53f8ad5661b3e31b + .quad 0x976d3ccbfad2fdd1 + .quad 0xcb88839737a640a8 + .quad 0x2ff00c1d6734cb25 + .quad 0x269ff4dc789c2d2b + .quad 0x0c003fbdc08d678d + .quad 0x4d982fa37ead2b17 + .quad 0xc07e6bcdb2e582f1 + .quad 0x296c7291df412a44 + + // 2^52 * 7 * G + + .quad 0x7903de2b33daf397 + .quad 0xd0ff0619c9a624b3 + .quad 0x8a1d252b555b3e18 + .quad 0x2b6d581c52e0b7c0 + .quad 0xdfb23205dab8b59e + .quad 0x465aeaa0c8092250 + .quad 0xd133c1189a725d18 + .quad 0x2327370261f117d1 + .quad 0x3d0543d3623e7986 + .quad 0x679414c2c278a354 + .quad 0xae43f0cc726196f6 + .quad 0x7836c41f8245eaba + + // 2^52 * 8 * G + + .quad 0xe7a254db49e95a81 + .quad 0x5192d5d008b0ad73 + .quad 0x4d20e5b1d00afc07 + .quad 0x5d55f8012cf25f38 + .quad 0xca651e848011937c + .quad 0xc6b0c46e6ef41a28 + .quad 0xb7021ba75f3f8d52 + .quad 0x119dff99ead7b9fd + .quad 0x43eadfcbf4b31d4d + .quad 0xc6503f7411148892 + .quad 0xfeee68c5060d3b17 + .quad 0x329293b3dd4a0ac8 + + // 2^56 * 1 * G + + .quad 0x4e59214fe194961a + .quad 0x49be7dc70d71cd4f + .quad 0x9300cfd23b50f22d + .quad 0x4789d446fc917232 + .quad 0x2879852d5d7cb208 + .quad 0xb8dedd70687df2e7 + .quad 0xdc0bffab21687891 + .quad 0x2b44c043677daa35 + .quad 0x1a1c87ab074eb78e + .quad 0xfac6d18e99daf467 + .quad 0x3eacbbcd484f9067 + .quad 0x60c52eef2bb9a4e4 + + // 2^56 * 2 * G + + .quad 0x0b5d89bc3bfd8bf1 + .quad 0xb06b9237c9f3551a + .quad 0x0e4c16b0d53028f5 + .quad 0x10bc9c312ccfcaab + .quad 0x702bc5c27cae6d11 + .quad 0x44c7699b54a48cab + .quad 0xefbc4056ba492eb2 + .quad 0x70d77248d9b6676d + .quad 0xaa8ae84b3ec2a05b + .quad 0x98699ef4ed1781e0 + .quad 0x794513e4708e85d1 + .quad 0x63755bd3a976f413 + + // 2^56 * 3 * G + + .quad 0xb55fa03e2ad10853 + .quad 0x356f75909ee63569 + .quad 0x9ff9f1fdbe69b890 + .quad 0x0d8cc1c48bc16f84 + .quad 0x3dc7101897f1acb7 + .quad 0x5dda7d5ec165bbd8 + .quad 0x508e5b9c0fa1020f + .quad 0x2763751737c52a56 + .quad 0x029402d36eb419a9 + .quad 0xf0b44e7e77b460a5 + .quad 0xcfa86230d43c4956 + .quad 0x70c2dd8a7ad166e7 + + // 2^56 * 4 * G + + .quad 0x656194509f6fec0e + .quad 0xee2e7ea946c6518d + .quad 0x9733c1f367e09b5c + .quad 0x2e0fac6363948495 + .quad 0x91d4967db8ed7e13 + .quad 0x74252f0ad776817a + .quad 0xe40982e00d852564 + .quad 0x32b8613816a53ce5 + .quad 0x79e7f7bee448cd64 + .quad 0x6ac83a67087886d0 + .quad 0xf89fd4d9a0e4db2e + .quad 0x4179215c735a4f41 + + // 2^56 * 5 * G + + .quad 0x8c7094e7d7dced2a + .quad 0x97fb8ac347d39c70 + .quad 0xe13be033a906d902 + .quad 0x700344a30cd99d76 + .quad 0xe4ae33b9286bcd34 + .quad 0xb7ef7eb6559dd6dc + .quad 0x278b141fb3d38e1f + .quad 0x31fa85662241c286 + .quad 0xaf826c422e3622f4 + .quad 0xc12029879833502d + .quad 0x9bc1b7e12b389123 + .quad 0x24bb2312a9952489 + + // 2^56 * 6 * G + + .quad 0xb1a8ed1732de67c3 + .quad 0x3cb49418461b4948 + .quad 0x8ebd434376cfbcd2 + .quad 0x0fee3e871e188008 + .quad 0x41f80c2af5f85c6b + .quad 0x687284c304fa6794 + .quad 0x8945df99a3ba1bad + .quad 0x0d1d2af9ffeb5d16 + .quad 0xa9da8aa132621edf + .quad 0x30b822a159226579 + .quad 0x4004197ba79ac193 + .quad 0x16acd79718531d76 + + // 2^56 * 7 * G + + .quad 0x72df72af2d9b1d3d + .quad 0x63462a36a432245a + .quad 0x3ecea07916b39637 + .quad 0x123e0ef6b9302309 + .quad 0xc959c6c57887b6ad + .quad 0x94e19ead5f90feba + .quad 0x16e24e62a342f504 + .quad 0x164ed34b18161700 + .quad 0x487ed94c192fe69a + .quad 0x61ae2cea3a911513 + .quad 0x877bf6d3b9a4de27 + .quad 0x78da0fc61073f3eb + + // 2^56 * 8 * G + + .quad 0x5bf15d28e52bc66a + .quad 0x2c47e31870f01a8e + .quad 0x2419afbc06c28bdd + .quad 0x2d25deeb256b173a + .quad 0xa29f80f1680c3a94 + .quad 0x71f77e151ae9e7e6 + .quad 0x1100f15848017973 + .quad 0x054aa4b316b38ddd + .quad 0xdfc8468d19267cb8 + .quad 0x0b28789c66e54daf + .quad 0x2aeb1d2a666eec17 + .quad 0x134610a6ab7da760 + + // 2^60 * 1 * G + + .quad 0xcaf55ec27c59b23f + .quad 0x99aeed3e154d04f2 + .quad 0x68441d72e14141f4 + .quad 0x140345133932a0a2 + .quad 0xd91430e0dc028c3c + .quad 0x0eb955a85217c771 + .quad 0x4b09e1ed2c99a1fa + .quad 0x42881af2bd6a743c + .quad 0x7bfec69aab5cad3d + .quad 0xc23e8cd34cb2cfad + .quad 0x685dd14bfb37d6a2 + .quad 0x0ad6d64415677a18 + + // 2^60 * 2 * G + + .quad 0x781a439e417becb5 + .quad 0x4ac5938cd10e0266 + .quad 0x5da385110692ac24 + .quad 0x11b065a2ade31233 + .quad 0x7914892847927e9f + .quad 0x33dad6ef370aa877 + .quad 0x1f8f24fa11122703 + .quad 0x5265ac2f2adf9592 + .quad 0x405fdd309afcb346 + .quad 0xd9723d4428e63f54 + .quad 0x94c01df05f65aaae + .quad 0x43e4dc3ae14c0809 + + // 2^60 * 3 * G + + .quad 0xbc12c7f1a938a517 + .quad 0x473028ab3180b2e1 + .quad 0x3f78571efbcd254a + .quad 0x74e534426ff6f90f + .quad 0xea6f7ac3adc2c6a3 + .quad 0xd0e928f6e9717c94 + .quad 0xe2d379ead645eaf5 + .quad 0x46dd8785c51ffbbe + .quad 0x709801be375c8898 + .quad 0x4b06dab5e3fd8348 + .quad 0x75880ced27230714 + .quad 0x2b09468fdd2f4c42 + + // 2^60 * 4 * G + + .quad 0x97c749eeb701cb96 + .quad 0x83f438d4b6a369c3 + .quad 0x62962b8b9a402cd9 + .quad 0x6976c7509888df7b + .quad 0x5b97946582ffa02a + .quad 0xda096a51fea8f549 + .quad 0xa06351375f77af9b + .quad 0x1bcfde61201d1e76 + .quad 0x4a4a5490246a59a2 + .quad 0xd63ebddee87fdd90 + .quad 0xd9437c670d2371fa + .quad 0x69e87308d30f8ed6 + + // 2^60 * 5 * G + + .quad 0x435a8bb15656beb0 + .quad 0xf8fac9ba4f4d5bca + .quad 0xb9b278c41548c075 + .quad 0x3eb0ef76e892b622 + .quad 0x0f80bf028bc80303 + .quad 0x6aae16b37a18cefb + .quad 0xdd47ea47d72cd6a3 + .quad 0x61943588f4ed39aa + .quad 0xd26e5c3e91039f85 + .quad 0xc0e9e77df6f33aa9 + .quad 0xe8968c5570066a93 + .quad 0x3c34d1881faaaddd + + // 2^60 * 6 * G + + .quad 0x3f9d2b5ea09f9ec0 + .quad 0x1dab3b6fb623a890 + .quad 0xa09ba3ea72d926c4 + .quad 0x374193513fd8b36d + .quad 0xbd5b0b8f2fffe0d9 + .quad 0x6aa254103ed24fb9 + .quad 0x2ac7d7bcb26821c4 + .quad 0x605b394b60dca36a + .quad 0xb4e856e45a9d1ed2 + .quad 0xefe848766c97a9a2 + .quad 0xb104cf641e5eee7d + .quad 0x2f50b81c88a71c8f + + // 2^60 * 7 * G + + .quad 0x31723c61fc6811bb + .quad 0x9cb450486211800f + .quad 0x768933d347995753 + .quad 0x3491a53502752fcd + .quad 0x2b552ca0a7da522a + .quad 0x3230b336449b0250 + .quad 0xf2c4c5bca4b99fb9 + .quad 0x7b2c674958074a22 + .quad 0xd55165883ed28cdf + .quad 0x12d84fd2d362de39 + .quad 0x0a874ad3e3378e4f + .quad 0x000d2b1f7c763e74 + + // 2^60 * 8 * G + + .quad 0x3d420811d06d4a67 + .quad 0xbefc048590e0ffe3 + .quad 0xf870c6b7bd487bde + .quad 0x6e2a7316319afa28 + .quad 0x9624778c3e94a8ab + .quad 0x0ad6f3cee9a78bec + .quad 0x948ac7810d743c4f + .quad 0x76627935aaecfccc + .quad 0x56a8ac24d6d59a9f + .quad 0xc8db753e3096f006 + .quad 0x477f41e68f4c5299 + .quad 0x588d851cf6c86114 + + // 2^64 * 1 * G + + .quad 0x51138ec78df6b0fe + .quad 0x5397da89e575f51b + .quad 0x09207a1d717af1b9 + .quad 0x2102fdba2b20d650 + .quad 0xcd2a65e777d1f515 + .quad 0x548991878faa60f1 + .quad 0xb1b73bbcdabc06e5 + .quad 0x654878cba97cc9fb + .quad 0x969ee405055ce6a1 + .quad 0x36bca7681251ad29 + .quad 0x3a1af517aa7da415 + .quad 0x0ad725db29ecb2ba + + // 2^64 * 2 * G + + .quad 0xdc4267b1834e2457 + .quad 0xb67544b570ce1bc5 + .quad 0x1af07a0bf7d15ed7 + .quad 0x4aefcffb71a03650 + .quad 0xfec7bc0c9b056f85 + .quad 0x537d5268e7f5ffd7 + .quad 0x77afc6624312aefa + .quad 0x4f675f5302399fd9 + .quad 0xc32d36360415171e + .quad 0xcd2bef118998483b + .quad 0x870a6eadd0945110 + .quad 0x0bccbb72a2a86561 + + // 2^64 * 3 * G + + .quad 0x185e962feab1a9c8 + .quad 0x86e7e63565147dcd + .quad 0xb092e031bb5b6df2 + .quad 0x4024f0ab59d6b73e + .quad 0x186d5e4c50fe1296 + .quad 0xe0397b82fee89f7e + .quad 0x3bc7f6c5507031b0 + .quad 0x6678fd69108f37c2 + .quad 0x1586fa31636863c2 + .quad 0x07f68c48572d33f2 + .quad 0x4f73cc9f789eaefc + .quad 0x2d42e2108ead4701 + + // 2^64 * 4 * G + + .quad 0x97f5131594dfd29b + .quad 0x6155985d313f4c6a + .quad 0xeba13f0708455010 + .quad 0x676b2608b8d2d322 + .quad 0x21717b0d0f537593 + .quad 0x914e690b131e064c + .quad 0x1bb687ae752ae09f + .quad 0x420bf3a79b423c6e + .quad 0x8138ba651c5b2b47 + .quad 0x8671b6ec311b1b80 + .quad 0x7bff0cb1bc3135b0 + .quad 0x745d2ffa9c0cf1e0 + + // 2^64 * 5 * G + + .quad 0xbf525a1e2bc9c8bd + .quad 0xea5b260826479d81 + .quad 0xd511c70edf0155db + .quad 0x1ae23ceb960cf5d0 + .quad 0x6036df5721d34e6a + .quad 0xb1db8827997bb3d0 + .quad 0xd3c209c3c8756afa + .quad 0x06e15be54c1dc839 + .quad 0x5b725d871932994a + .quad 0x32351cb5ceb1dab0 + .quad 0x7dc41549dab7ca05 + .quad 0x58ded861278ec1f7 + + // 2^64 * 6 * G + + .quad 0xd8173793f266c55c + .quad 0xc8c976c5cc454e49 + .quad 0x5ce382f8bc26c3a8 + .quad 0x2ff39de85485f6f9 + .quad 0x2dfb5ba8b6c2c9a8 + .quad 0x48eeef8ef52c598c + .quad 0x33809107f12d1573 + .quad 0x08ba696b531d5bd8 + .quad 0x77ed3eeec3efc57a + .quad 0x04e05517d4ff4811 + .quad 0xea3d7a3ff1a671cb + .quad 0x120633b4947cfe54 + + // 2^64 * 7 * G + + .quad 0x0b94987891610042 + .quad 0x4ee7b13cecebfae8 + .quad 0x70be739594f0a4c0 + .quad 0x35d30a99b4d59185 + .quad 0x82bd31474912100a + .quad 0xde237b6d7e6fbe06 + .quad 0xe11e761911ea79c6 + .quad 0x07433be3cb393bde + .quad 0xff7944c05ce997f4 + .quad 0x575d3de4b05c51a3 + .quad 0x583381fd5a76847c + .quad 0x2d873ede7af6da9f + + // 2^64 * 8 * G + + .quad 0x157a316443373409 + .quad 0xfab8b7eef4aa81d9 + .quad 0xb093fee6f5a64806 + .quad 0x2e773654707fa7b6 + .quad 0xaa6202e14e5df981 + .quad 0xa20d59175015e1f5 + .quad 0x18a275d3bae21d6c + .quad 0x0543618a01600253 + .quad 0x0deabdf4974c23c1 + .quad 0xaa6f0a259dce4693 + .quad 0x04202cb8a29aba2c + .quad 0x4b1443362d07960d + + // 2^68 * 1 * G + + .quad 0x47b837f753242cec + .quad 0x256dc48cc04212f2 + .quad 0xe222fbfbe1d928c5 + .quad 0x48ea295bad8a2c07 + .quad 0x299b1c3f57c5715e + .quad 0x96cb929e6b686d90 + .quad 0x3004806447235ab3 + .quad 0x2c435c24a44d9fe1 + .quad 0x0607c97c80f8833f + .quad 0x0e851578ca25ec5b + .quad 0x54f7450b161ebb6f + .quad 0x7bcb4792a0def80e + + // 2^68 * 2 * G + + .quad 0x8487e3d02bc73659 + .quad 0x4baf8445059979df + .quad 0xd17c975adcad6fbf + .quad 0x57369f0bdefc96b6 + .quad 0x1cecd0a0045224c2 + .quad 0x757f1b1b69e53952 + .quad 0x775b7a925289f681 + .quad 0x1b6cc62016736148 + .quad 0xf1a9990175638698 + .quad 0x353dd1beeeaa60d3 + .quad 0x849471334c9ba488 + .quad 0x63fa6e6843ade311 + + // 2^68 * 3 * G + + .quad 0xd15c20536597c168 + .quad 0x9f73740098d28789 + .quad 0x18aee7f13257ba1f + .quad 0x3418bfda07346f14 + .quad 0x2195becdd24b5eb7 + .quad 0x5e41f18cc0cd44f9 + .quad 0xdf28074441ca9ede + .quad 0x07073b98f35b7d67 + .quad 0xd03c676c4ce530d4 + .quad 0x0b64c0473b5df9f4 + .quad 0x065cef8b19b3a31e + .quad 0x3084d661533102c9 + + // 2^68 * 4 * G + + .quad 0xe1f6b79ebf8469ad + .quad 0x15801004e2663135 + .quad 0x9a498330af74181b + .quad 0x3ba2504f049b673c + .quad 0x9a6ce876760321fd + .quad 0x7fe2b5109eb63ad8 + .quad 0x00e7d4ae8ac80592 + .quad 0x73d86b7abb6f723a + .quad 0x0b52b5606dba5ab6 + .quad 0xa9134f0fbbb1edab + .quad 0x30a9520d9b04a635 + .quad 0x6813b8f37973e5db + + // 2^68 * 5 * G + + .quad 0x9854b054334127c1 + .quad 0x105d047882fbff25 + .quad 0xdb49f7f944186f4f + .quad 0x1768e838bed0b900 + .quad 0xf194ca56f3157e29 + .quad 0x136d35705ef528a5 + .quad 0xdd4cef778b0599bc + .quad 0x7d5472af24f833ed + .quad 0xd0ef874daf33da47 + .quad 0x00d3be5db6e339f9 + .quad 0x3f2a8a2f9c9ceece + .quad 0x5d1aeb792352435a + + // 2^68 * 6 * G + + .quad 0xf59e6bb319cd63ca + .quad 0x670c159221d06839 + .quad 0xb06d565b2150cab6 + .quad 0x20fb199d104f12a3 + .quad 0x12c7bfaeb61ba775 + .quad 0xb84e621fe263bffd + .quad 0x0b47a5c35c840dcf + .quad 0x7e83be0bccaf8634 + .quad 0x61943dee6d99c120 + .quad 0x86101f2e460b9fe0 + .quad 0x6bb2f1518ee8598d + .quad 0x76b76289fcc475cc + + // 2^68 * 7 * G + + .quad 0x791b4cc1756286fa + .quad 0xdbced317d74a157c + .quad 0x7e732421ea72bde6 + .quad 0x01fe18491131c8e9 + .quad 0x4245f1a1522ec0b3 + .quad 0x558785b22a75656d + .quad 0x1d485a2548a1b3c0 + .quad 0x60959eccd58fe09f + .quad 0x3ebfeb7ba8ed7a09 + .quad 0x49fdc2bbe502789c + .quad 0x44ebce5d3c119428 + .quad 0x35e1eb55be947f4a + + // 2^68 * 8 * G + + .quad 0xdbdae701c5738dd3 + .quad 0xf9c6f635b26f1bee + .quad 0x61e96a8042f15ef4 + .quad 0x3aa1d11faf60a4d8 + .quad 0x14fd6dfa726ccc74 + .quad 0x3b084cfe2f53b965 + .quad 0xf33ae4f552a2c8b4 + .quad 0x59aab07a0d40166a + .quad 0x77bcec4c925eac25 + .quad 0x1848718460137738 + .quad 0x5b374337fea9f451 + .quad 0x1865e78ec8e6aa46 + + // 2^72 * 1 * G + + .quad 0xccc4b7c7b66e1f7a + .quad 0x44157e25f50c2f7e + .quad 0x3ef06dfc713eaf1c + .quad 0x582f446752da63f7 + .quad 0x967c54e91c529ccb + .quad 0x30f6269264c635fb + .quad 0x2747aff478121965 + .quad 0x17038418eaf66f5c + .quad 0xc6317bd320324ce4 + .quad 0xa81042e8a4488bc4 + .quad 0xb21ef18b4e5a1364 + .quad 0x0c2a1c4bcda28dc9 + + // 2^72 * 2 * G + + .quad 0xd24dc7d06f1f0447 + .quad 0xb2269e3edb87c059 + .quad 0xd15b0272fbb2d28f + .quad 0x7c558bd1c6f64877 + .quad 0xedc4814869bd6945 + .quad 0x0d6d907dbe1c8d22 + .quad 0xc63bd212d55cc5ab + .quad 0x5a6a9b30a314dc83 + .quad 0xd0ec1524d396463d + .quad 0x12bb628ac35a24f0 + .quad 0xa50c3a791cbc5fa4 + .quad 0x0404a5ca0afbafc3 + + // 2^72 * 3 * G + + .quad 0x8c1f40070aa743d6 + .quad 0xccbad0cb5b265ee8 + .quad 0x574b046b668fd2de + .quad 0x46395bfdcadd9633 + .quad 0x62bc9e1b2a416fd1 + .quad 0xb5c6f728e350598b + .quad 0x04343fd83d5d6967 + .quad 0x39527516e7f8ee98 + .quad 0x117fdb2d1a5d9a9c + .quad 0x9c7745bcd1005c2a + .quad 0xefd4bef154d56fea + .quad 0x76579a29e822d016 + + // 2^72 * 4 * G + + .quad 0x45b68e7e49c02a17 + .quad 0x23cd51a2bca9a37f + .quad 0x3ed65f11ec224c1b + .quad 0x43a384dc9e05bdb1 + .quad 0x333cb51352b434f2 + .quad 0xd832284993de80e1 + .quad 0xb5512887750d35ce + .quad 0x02c514bb2a2777c1 + .quad 0x684bd5da8bf1b645 + .quad 0xfb8bd37ef6b54b53 + .quad 0x313916d7a9b0d253 + .quad 0x1160920961548059 + + // 2^72 * 5 * G + + .quad 0xb44d166929dacfaa + .quad 0xda529f4c8413598f + .quad 0xe9ef63ca453d5559 + .quad 0x351e125bc5698e0b + .quad 0x7a385616369b4dcd + .quad 0x75c02ca7655c3563 + .quad 0x7dc21bf9d4f18021 + .quad 0x2f637d7491e6e042 + .quad 0xd4b49b461af67bbe + .quad 0xd603037ac8ab8961 + .quad 0x71dee19ff9a699fb + .quad 0x7f182d06e7ce2a9a + + // 2^72 * 6 * G + + .quad 0x7a7c8e64ab0168ec + .quad 0xcb5a4a5515edc543 + .quad 0x095519d347cd0eda + .quad 0x67d4ac8c343e93b0 + .quad 0x09454b728e217522 + .quad 0xaa58e8f4d484b8d8 + .quad 0xd358254d7f46903c + .quad 0x44acc043241c5217 + .quad 0x1c7d6bbb4f7a5777 + .quad 0x8b35fed4918313e1 + .quad 0x4adca1c6c96b4684 + .quad 0x556d1c8312ad71bd + + // 2^72 * 7 * G + + .quad 0x17ef40e30c8d3982 + .quad 0x31f7073e15a3fa34 + .quad 0x4f21f3cb0773646e + .quad 0x746c6c6d1d824eff + .quad 0x81f06756b11be821 + .quad 0x0faff82310a3f3dd + .quad 0xf8b2d0556a99465d + .quad 0x097abe38cc8c7f05 + .quad 0x0c49c9877ea52da4 + .quad 0x4c4369559bdc1d43 + .quad 0x022c3809f7ccebd2 + .quad 0x577e14a34bee84bd + + // 2^72 * 8 * G + + .quad 0xf0e268ac61a73b0a + .quad 0xf2fafa103791a5f5 + .quad 0xc1e13e826b6d00e9 + .quad 0x60fa7ee96fd78f42 + .quad 0x94fecebebd4dd72b + .quad 0xf46a4fda060f2211 + .quad 0x124a5977c0c8d1ff + .quad 0x705304b8fb009295 + .quad 0xb63d1d354d296ec6 + .quad 0xf3c3053e5fad31d8 + .quad 0x670b958cb4bd42ec + .quad 0x21398e0ca16353fd + + // 2^76 * 1 * G + + .quad 0x216ab2ca8da7d2ef + .quad 0x366ad9dd99f42827 + .quad 0xae64b9004fdd3c75 + .quad 0x403a395b53909e62 + .quad 0x86c5fc16861b7e9a + .quad 0xf6a330476a27c451 + .quad 0x01667267a1e93597 + .quad 0x05ffb9cd6082dfeb + .quad 0xa617fa9ff53f6139 + .quad 0x60f2b5e513e66cb6 + .quad 0xd7a8beefb3448aa4 + .quad 0x7a2932856f5ea192 + + // 2^76 * 2 * G + + .quad 0x0b39d761b02de888 + .quad 0x5f550e7ed2414e1f + .quad 0xa6bfa45822e1a940 + .quad 0x050a2f7dfd447b99 + .quad 0xb89c444879639302 + .quad 0x4ae4f19350c67f2c + .quad 0xf0b35da8c81af9c6 + .quad 0x39d0003546871017 + .quad 0x437c3b33a650db77 + .quad 0x6bafe81dbac52bb2 + .quad 0xfe99402d2db7d318 + .quad 0x2b5b7eec372ba6ce + + // 2^76 * 3 * G + + .quad 0xb3bc4bbd83f50eef + .quad 0x508f0c998c927866 + .quad 0x43e76587c8b7e66e + .quad 0x0f7655a3a47f98d9 + .quad 0xa694404d613ac8f4 + .quad 0x500c3c2bfa97e72c + .quad 0x874104d21fcec210 + .quad 0x1b205fb38604a8ee + .quad 0x55ecad37d24b133c + .quad 0x441e147d6038c90b + .quad 0x656683a1d62c6fee + .quad 0x0157d5dc87e0ecae + + // 2^76 * 4 * G + + .quad 0xf2a7af510354c13d + .quad 0xd7a0b145aa372b60 + .quad 0x2869b96a05a3d470 + .quad 0x6528e42d82460173 + .quad 0x95265514d71eb524 + .quad 0xe603d8815df14593 + .quad 0x147cdf410d4de6b7 + .quad 0x5293b1730437c850 + .quad 0x23d0e0814bccf226 + .quad 0x92c745cd8196fb93 + .quad 0x8b61796c59541e5b + .quad 0x40a44df0c021f978 + + // 2^76 * 5 * G + + .quad 0xdaa869894f20ea6a + .quad 0xea14a3d14c620618 + .quad 0x6001fccb090bf8be + .quad 0x35f4e822947e9cf0 + .quad 0x86c96e514bc5d095 + .quad 0xf20d4098fca6804a + .quad 0x27363d89c826ea5d + .quad 0x39ca36565719cacf + .quad 0x97506f2f6f87b75c + .quad 0xc624aea0034ae070 + .quad 0x1ec856e3aad34dd6 + .quad 0x055b0be0e440e58f + + // 2^76 * 6 * G + + .quad 0x6469a17d89735d12 + .quad 0xdb6f27d5e662b9f1 + .quad 0x9fcba3286a395681 + .quad 0x363b8004d269af25 + .quad 0x4d12a04b6ea33da2 + .quad 0x57cf4c15e36126dd + .quad 0x90ec9675ee44d967 + .quad 0x64ca348d2a985aac + .quad 0x99588e19e4c4912d + .quad 0xefcc3b4e1ca5ce6b + .quad 0x4522ea60fa5b98d5 + .quad 0x7064bbab1de4a819 + + // 2^76 * 7 * G + + .quad 0xb919e1515a770641 + .quad 0xa9a2e2c74e7f8039 + .quad 0x7527250b3df23109 + .quad 0x756a7330ac27b78b + .quad 0xa290c06142542129 + .quad 0xf2e2c2aebe8d5b90 + .quad 0xcf2458db76abfe1b + .quad 0x02157ade83d626bf + .quad 0x3e46972a1b9a038b + .quad 0x2e4ee66a7ee03fb4 + .quad 0x81a248776edbb4ca + .quad 0x1a944ee88ecd0563 + + // 2^76 * 8 * G + + .quad 0xd5a91d1151039372 + .quad 0x2ed377b799ca26de + .quad 0xa17202acfd366b6b + .quad 0x0730291bd6901995 + .quad 0xbb40a859182362d6 + .quad 0xb99f55778a4d1abb + .quad 0x8d18b427758559f6 + .quad 0x26c20fe74d26235a + .quad 0x648d1d9fe9cc22f5 + .quad 0x66bc561928dd577c + .quad 0x47d3ed21652439d1 + .quad 0x49d271acedaf8b49 + + // 2^80 * 1 * G + + .quad 0x89f5058a382b33f3 + .quad 0x5ae2ba0bad48c0b4 + .quad 0x8f93b503a53db36e + .quad 0x5aa3ed9d95a232e6 + .quad 0x2798aaf9b4b75601 + .quad 0x5eac72135c8dad72 + .quad 0xd2ceaa6161b7a023 + .quad 0x1bbfb284e98f7d4e + .quad 0x656777e9c7d96561 + .quad 0xcb2b125472c78036 + .quad 0x65053299d9506eee + .quad 0x4a07e14e5e8957cc + + // 2^80 * 2 * G + + .quad 0x4ee412cb980df999 + .quad 0xa315d76f3c6ec771 + .quad 0xbba5edde925c77fd + .quad 0x3f0bac391d313402 + .quad 0x240b58cdc477a49b + .quad 0xfd38dade6447f017 + .quad 0x19928d32a7c86aad + .quad 0x50af7aed84afa081 + .quad 0x6e4fde0115f65be5 + .quad 0x29982621216109b2 + .quad 0x780205810badd6d9 + .quad 0x1921a316baebd006 + + // 2^80 * 3 * G + + .quad 0x89422f7edfb870fc + .quad 0x2c296beb4f76b3bd + .quad 0x0738f1d436c24df7 + .quad 0x6458df41e273aeb0 + .quad 0xd75aad9ad9f3c18b + .quad 0x566a0eef60b1c19c + .quad 0x3e9a0bac255c0ed9 + .quad 0x7b049deca062c7f5 + .quad 0xdccbe37a35444483 + .quad 0x758879330fedbe93 + .quad 0x786004c312c5dd87 + .quad 0x6093dccbc2950e64 + + // 2^80 * 4 * G + + .quad 0x1ff39a8585e0706d + .quad 0x36d0a5d8b3e73933 + .quad 0x43b9f2e1718f453b + .quad 0x57d1ea084827a97c + .quad 0x6bdeeebe6084034b + .quad 0x3199c2b6780fb854 + .quad 0x973376abb62d0695 + .quad 0x6e3180c98b647d90 + .quad 0xee7ab6e7a128b071 + .quad 0xa4c1596d93a88baa + .quad 0xf7b4de82b2216130 + .quad 0x363e999ddd97bd18 + + // 2^80 * 5 * G + + .quad 0x96a843c135ee1fc4 + .quad 0x976eb35508e4c8cf + .quad 0xb42f6801b58cd330 + .quad 0x48ee9b78693a052b + .quad 0x2f1848dce24baec6 + .quad 0x769b7255babcaf60 + .quad 0x90cb3c6e3cefe931 + .quad 0x231f979bc6f9b355 + .quad 0x5c31de4bcc2af3c6 + .quad 0xb04bb030fe208d1f + .quad 0xb78d7009c14fb466 + .quad 0x079bfa9b08792413 + + // 2^80 * 6 * G + + .quad 0xe3903a51da300df4 + .quad 0x843964233da95ab0 + .quad 0xed3cf12d0b356480 + .quad 0x038c77f684817194 + .quad 0xf3c9ed80a2d54245 + .quad 0x0aa08b7877f63952 + .quad 0xd76dac63d1085475 + .quad 0x1ef4fb159470636b + .quad 0x854e5ee65b167bec + .quad 0x59590a4296d0cdc2 + .quad 0x72b2df3498102199 + .quad 0x575ee92a4a0bff56 + + // 2^80 * 7 * G + + .quad 0xd4c080908a182fcf + .quad 0x30e170c299489dbd + .quad 0x05babd5752f733de + .quad 0x43d4e7112cd3fd00 + .quad 0x5d46bc450aa4d801 + .quad 0xc3af1227a533b9d8 + .quad 0x389e3b262b8906c2 + .quad 0x200a1e7e382f581b + .quad 0x518db967eaf93ac5 + .quad 0x71bc989b056652c0 + .quad 0xfe2b85d9567197f5 + .quad 0x050eca52651e4e38 + + // 2^80 * 8 * G + + .quad 0xc3431ade453f0c9c + .quad 0xe9f5045eff703b9b + .quad 0xfcd97ac9ed847b3d + .quad 0x4b0ee6c21c58f4c6 + .quad 0x97ac397660e668ea + .quad 0x9b19bbfe153ab497 + .quad 0x4cb179b534eca79f + .quad 0x6151c09fa131ae57 + .quad 0x3af55c0dfdf05d96 + .quad 0xdd262ee02ab4ee7a + .quad 0x11b2bb8712171709 + .quad 0x1fef24fa800f030b + + // 2^84 * 1 * G + + .quad 0xb496123a6b6c6609 + .quad 0xa750fe8580ab5938 + .quad 0xf471bf39b7c27a5f + .quad 0x507903ce77ac193c + .quad 0xff91a66a90166220 + .quad 0xf22552ae5bf1e009 + .quad 0x7dff85d87f90df7c + .quad 0x4f620ffe0c736fb9 + .quad 0x62f90d65dfde3e34 + .quad 0xcf28c592b9fa5fad + .quad 0x99c86ef9c6164510 + .quad 0x25d448044a256c84 + + // 2^84 * 2 * G + + .quad 0xbd68230ec7e9b16f + .quad 0x0eb1b9c1c1c5795d + .quad 0x7943c8c495b6b1ff + .quad 0x2f9faf620bbacf5e + .quad 0x2c7c4415c9022b55 + .quad 0x56a0d241812eb1fe + .quad 0xf02ea1c9d7b65e0d + .quad 0x4180512fd5323b26 + .quad 0xa4ff3e698a48a5db + .quad 0xba6a3806bd95403b + .quad 0x9f7ce1af47d5b65d + .quad 0x15e087e55939d2fb + + // 2^84 * 3 * G + + .quad 0x12207543745c1496 + .quad 0xdaff3cfdda38610c + .quad 0xe4e797272c71c34f + .quad 0x39c07b1934bdede9 + .quad 0x8894186efb963f38 + .quad 0x48a00e80dc639bd5 + .quad 0xa4e8092be96c1c99 + .quad 0x5a097d54ca573661 + .quad 0x2d45892b17c9e755 + .quad 0xd033fd7289308df8 + .quad 0x6c2fe9d9525b8bd9 + .quad 0x2edbecf1c11cc079 + + // 2^84 * 4 * G + + .quad 0x1616a4e3c715a0d2 + .quad 0x53623cb0f8341d4d + .quad 0x96ef5329c7e899cb + .quad 0x3d4e8dbba668baa6 + .quad 0xee0f0fddd087a25f + .quad 0x9c7531555c3e34ee + .quad 0x660c572e8fab3ab5 + .quad 0x0854fc44544cd3b2 + .quad 0x61eba0c555edad19 + .quad 0x24b533fef0a83de6 + .quad 0x3b77042883baa5f8 + .quad 0x678f82b898a47e8d + + // 2^84 * 5 * G + + .quad 0xb1491d0bd6900c54 + .quad 0x3539722c9d132636 + .quad 0x4db928920b362bc9 + .quad 0x4d7cd1fea68b69df + .quad 0x1e09d94057775696 + .quad 0xeed1265c3cd951db + .quad 0xfa9dac2b20bce16f + .quad 0x0f7f76e0e8d089f4 + .quad 0x36d9ebc5d485b00c + .quad 0xa2596492e4adb365 + .quad 0xc1659480c2119ccd + .quad 0x45306349186e0d5f + + // 2^84 * 6 * G + + .quad 0x94ddd0c1a6cdff1d + .quad 0x55f6f115e84213ae + .quad 0x6c935f85992fcf6a + .quad 0x067ee0f54a37f16f + .quad 0x96a414ec2b072491 + .quad 0x1bb2218127a7b65b + .quad 0x6d2849596e8a4af0 + .quad 0x65f3b08ccd27765f + .quad 0xecb29fff199801f7 + .quad 0x9d361d1fa2a0f72f + .quad 0x25f11d2375fd2f49 + .quad 0x124cefe80fe10fe2 + + // 2^84 * 7 * G + + .quad 0x4c126cf9d18df255 + .quad 0xc1d471e9147a63b6 + .quad 0x2c6d3c73f3c93b5f + .quad 0x6be3a6a2e3ff86a2 + .quad 0x1518e85b31b16489 + .quad 0x8faadcb7db710bfb + .quad 0x39b0bdf4a14ae239 + .quad 0x05f4cbea503d20c1 + .quad 0xce040e9ec04145bc + .quad 0xc71ff4e208f6834c + .quad 0xbd546e8dab8847a3 + .quad 0x64666aa0a4d2aba5 + + // 2^84 * 8 * G + + .quad 0x6841435a7c06d912 + .quad 0xca123c21bb3f830b + .quad 0xd4b37b27b1cbe278 + .quad 0x1d753b84c76f5046 + .quad 0xb0c53bf73337e94c + .quad 0x7cb5697e11e14f15 + .quad 0x4b84abac1930c750 + .quad 0x28dd4abfe0640468 + .quad 0x7dc0b64c44cb9f44 + .quad 0x18a3e1ace3925dbf + .quad 0x7a3034862d0457c4 + .quad 0x4c498bf78a0c892e + + // 2^88 * 1 * G + + .quad 0x37d653fb1aa73196 + .quad 0x0f9495303fd76418 + .quad 0xad200b09fb3a17b2 + .quad 0x544d49292fc8613e + .quad 0x22d2aff530976b86 + .quad 0x8d90b806c2d24604 + .quad 0xdca1896c4de5bae5 + .quad 0x28005fe6c8340c17 + .quad 0x6aefba9f34528688 + .quad 0x5c1bff9425107da1 + .quad 0xf75bbbcd66d94b36 + .quad 0x72e472930f316dfa + + // 2^88 * 2 * G + + .quad 0x2695208c9781084f + .quad 0xb1502a0b23450ee1 + .quad 0xfd9daea603efde02 + .quad 0x5a9d2e8c2733a34c + .quad 0x07f3f635d32a7627 + .quad 0x7aaa4d865f6566f0 + .quad 0x3c85e79728d04450 + .quad 0x1fee7f000fe06438 + .quad 0x765305da03dbf7e5 + .quad 0xa4daf2491434cdbd + .quad 0x7b4ad5cdd24a88ec + .quad 0x00f94051ee040543 + + // 2^88 * 3 * G + + .quad 0x8d356b23c3d330b2 + .quad 0xf21c8b9bb0471b06 + .quad 0xb36c316c6e42b83c + .quad 0x07d79c7e8beab10d + .quad 0xd7ef93bb07af9753 + .quad 0x583ed0cf3db766a7 + .quad 0xce6998bf6e0b1ec5 + .quad 0x47b7ffd25dd40452 + .quad 0x87fbfb9cbc08dd12 + .quad 0x8a066b3ae1eec29b + .quad 0x0d57242bdb1fc1bf + .quad 0x1c3520a35ea64bb6 + + // 2^88 * 4 * G + + .quad 0x80d253a6bccba34a + .quad 0x3e61c3a13838219b + .quad 0x90c3b6019882e396 + .quad 0x1c3d05775d0ee66f + .quad 0xcda86f40216bc059 + .quad 0x1fbb231d12bcd87e + .quad 0xb4956a9e17c70990 + .quad 0x38750c3b66d12e55 + .quad 0x692ef1409422e51a + .quad 0xcbc0c73c2b5df671 + .quad 0x21014fe7744ce029 + .quad 0x0621e2c7d330487c + + // 2^88 * 5 * G + + .quad 0xaf9860cc8259838d + .quad 0x90ea48c1c69f9adc + .quad 0x6526483765581e30 + .quad 0x0007d6097bd3a5bc + .quad 0xb7ae1796b0dbf0f3 + .quad 0x54dfafb9e17ce196 + .quad 0x25923071e9aaa3b4 + .quad 0x5d8e589ca1002e9d + .quad 0xc0bf1d950842a94b + .quad 0xb2d3c363588f2e3e + .quad 0x0a961438bb51e2ef + .quad 0x1583d7783c1cbf86 + + // 2^88 * 6 * G + + .quad 0xeceea2ef5da27ae1 + .quad 0x597c3a1455670174 + .quad 0xc9a62a126609167a + .quad 0x252a5f2e81ed8f70 + .quad 0x90034704cc9d28c7 + .quad 0x1d1b679ef72cc58f + .quad 0x16e12b5fbe5b8726 + .quad 0x4958064e83c5580a + .quad 0x0d2894265066e80d + .quad 0xfcc3f785307c8c6b + .quad 0x1b53da780c1112fd + .quad 0x079c170bd843b388 + + // 2^88 * 7 * G + + .quad 0x0506ece464fa6fff + .quad 0xbee3431e6205e523 + .quad 0x3579422451b8ea42 + .quad 0x6dec05e34ac9fb00 + .quad 0xcdd6cd50c0d5d056 + .quad 0x9af7686dbb03573b + .quad 0x3ca6723ff3c3ef48 + .quad 0x6768c0d7317b8acc + .quad 0x94b625e5f155c1b3 + .quad 0x417bf3a7997b7b91 + .quad 0xc22cbddc6d6b2600 + .quad 0x51445e14ddcd52f4 + + // 2^88 * 8 * G + + .quad 0x57502b4b3b144951 + .quad 0x8e67ff6b444bbcb3 + .quad 0xb8bd6927166385db + .quad 0x13186f31e39295c8 + .quad 0x893147ab2bbea455 + .quad 0x8c53a24f92079129 + .quad 0x4b49f948be30f7a7 + .quad 0x12e990086e4fd43d + .quad 0xf10c96b37fdfbb2e + .quad 0x9f9a935e121ceaf9 + .quad 0xdf1136c43a5b983f + .quad 0x77b2e3f05d3e99af + + // 2^92 * 1 * G + + .quad 0xfd0d75879cf12657 + .quad 0xe82fef94e53a0e29 + .quad 0xcc34a7f05bbb4be7 + .quad 0x0b251172a50c38a2 + .quad 0x9532f48fcc5cd29b + .quad 0x2ba851bea3ce3671 + .quad 0x32dacaa051122941 + .quad 0x478d99d9350004f2 + .quad 0x1d5ad94890bb02c0 + .quad 0x50e208b10ec25115 + .quad 0xa26a22894ef21702 + .quad 0x4dc923343b524805 + + // 2^92 * 2 * G + + .quad 0xe3828c400f8086b6 + .quad 0x3f77e6f7979f0dc8 + .quad 0x7ef6de304df42cb4 + .quad 0x5265797cb6abd784 + .quad 0x3ad3e3ebf36c4975 + .quad 0xd75d25a537862125 + .quad 0xe873943da025a516 + .quad 0x6bbc7cb4c411c847 + .quad 0x3c6f9cd1d4a50d56 + .quad 0xb6244077c6feab7e + .quad 0x6ff9bf483580972e + .quad 0x00375883b332acfb + + // 2^92 * 3 * G + + .quad 0x0001b2cd28cb0940 + .quad 0x63fb51a06f1c24c9 + .quad 0xb5ad8691dcd5ca31 + .quad 0x67238dbd8c450660 + .quad 0xc98bec856c75c99c + .quad 0xe44184c000e33cf4 + .quad 0x0a676b9bba907634 + .quad 0x669e2cb571f379d7 + .quad 0xcb116b73a49bd308 + .quad 0x025aad6b2392729e + .quad 0xb4793efa3f55d9b1 + .quad 0x72a1056140678bb9 + + // 2^92 * 4 * G + + .quad 0xa2b6812b1cc9249d + .quad 0x62866eee21211f58 + .quad 0x2cb5c5b85df10ece + .quad 0x03a6b259e263ae00 + .quad 0x0d8d2909e2e505b6 + .quad 0x98ca78abc0291230 + .quad 0x77ef5569a9b12327 + .quad 0x7c77897b81439b47 + .quad 0xf1c1b5e2de331cb5 + .quad 0x5a9f5d8e15fca420 + .quad 0x9fa438f17bd932b1 + .quad 0x2a381bf01c6146e7 + + // 2^92 * 5 * G + + .quad 0xac9b9879cfc811c1 + .quad 0x8b7d29813756e567 + .quad 0x50da4e607c70edfc + .quad 0x5dbca62f884400b6 + .quad 0xf7c0be32b534166f + .quad 0x27e6ca6419cf70d4 + .quad 0x934df7d7a957a759 + .quad 0x5701461dabdec2aa + .quad 0x2c6747402c915c25 + .quad 0x1bdcd1a80b0d340a + .quad 0x5e5601bd07b43f5f + .quad 0x2555b4e05539a242 + + // 2^92 * 6 * G + + .quad 0x6fc09f5266ddd216 + .quad 0xdce560a7c8e37048 + .quad 0xec65939da2df62fd + .quad 0x7a869ae7e52ed192 + .quad 0x78409b1d87e463d4 + .quad 0xad4da95acdfb639d + .quad 0xec28773755259b9c + .quad 0x69c806e9c31230ab + .quad 0x7b48f57414bb3f22 + .quad 0x68c7cee4aedccc88 + .quad 0xed2f936179ed80be + .quad 0x25d70b885f77bc4b + + // 2^92 * 7 * G + + .quad 0x4151c3d9762bf4de + .quad 0x083f435f2745d82b + .quad 0x29775a2e0d23ddd5 + .quad 0x138e3a6269a5db24 + .quad 0x98459d29bb1ae4d4 + .quad 0x56b9c4c739f954ec + .quad 0x832743f6c29b4b3e + .quad 0x21ea8e2798b6878a + .quad 0x87bef4b46a5a7b9c + .quad 0xd2299d1b5fc1d062 + .quad 0x82409818dd321648 + .quad 0x5c5abeb1e5a2e03d + + // 2^92 * 8 * G + + .quad 0x14722af4b73c2ddb + .quad 0xbc470c5f5a05060d + .quad 0x00943eac2581b02e + .quad 0x0e434b3b1f499c8f + .quad 0x02cde6de1306a233 + .quad 0x7b5a52a2116f8ec7 + .quad 0xe1c681f4c1163b5b + .quad 0x241d350660d32643 + .quad 0x6be4404d0ebc52c7 + .quad 0xae46233bb1a791f5 + .quad 0x2aec170ed25db42b + .quad 0x1d8dfd966645d694 + + // 2^96 * 1 * G + + .quad 0x296fa9c59c2ec4de + .quad 0xbc8b61bf4f84f3cb + .quad 0x1c7706d917a8f908 + .quad 0x63b795fc7ad3255d + .quad 0xd598639c12ddb0a4 + .quad 0xa5d19f30c024866b + .quad 0xd17c2f0358fce460 + .quad 0x07a195152e095e8a + .quad 0xa8368f02389e5fc8 + .quad 0x90433b02cf8de43b + .quad 0xafa1fd5dc5412643 + .quad 0x3e8fe83d032f0137 + + // 2^96 * 2 * G + + .quad 0x2f8b15b90570a294 + .quad 0x94f2427067084549 + .quad 0xde1c5ae161bbfd84 + .quad 0x75ba3b797fac4007 + .quad 0x08704c8de8efd13c + .quad 0xdfc51a8e33e03731 + .quad 0xa59d5da51260cde3 + .quad 0x22d60899a6258c86 + .quad 0x6239dbc070cdd196 + .quad 0x60fe8a8b6c7d8a9a + .quad 0xb38847bceb401260 + .quad 0x0904d07b87779e5e + + // 2^96 * 3 * G + + .quad 0xb4ce1fd4ddba919c + .quad 0xcf31db3ec74c8daa + .quad 0x2c63cc63ad86cc51 + .quad 0x43e2143fbc1dde07 + .quad 0xf4322d6648f940b9 + .quad 0x06952f0cbd2d0c39 + .quad 0x167697ada081f931 + .quad 0x6240aacebaf72a6c + .quad 0xf834749c5ba295a0 + .quad 0xd6947c5bca37d25a + .quad 0x66f13ba7e7c9316a + .quad 0x56bdaf238db40cac + + // 2^96 * 4 * G + + .quad 0x362ab9e3f53533eb + .quad 0x338568d56eb93d40 + .quad 0x9e0e14521d5a5572 + .quad 0x1d24a86d83741318 + .quad 0x1310d36cc19d3bb2 + .quad 0x062a6bb7622386b9 + .quad 0x7c9b8591d7a14f5c + .quad 0x03aa31507e1e5754 + .quad 0xf4ec7648ffd4ce1f + .quad 0xe045eaf054ac8c1c + .quad 0x88d225821d09357c + .quad 0x43b261dc9aeb4859 + + // 2^96 * 5 * G + + .quad 0xe55b1e1988bb79bb + .quad 0xa09ed07dc17a359d + .quad 0xb02c2ee2603dea33 + .quad 0x326055cf5b276bc2 + .quad 0x19513d8b6c951364 + .quad 0x94fe7126000bf47b + .quad 0x028d10ddd54f9567 + .quad 0x02b4d5e242940964 + .quad 0xb4a155cb28d18df2 + .quad 0xeacc4646186ce508 + .quad 0xc49cf4936c824389 + .quad 0x27a6c809ae5d3410 + + // 2^96 * 6 * G + + .quad 0x8ba6ebcd1f0db188 + .quad 0x37d3d73a675a5be8 + .quad 0xf22edfa315f5585a + .quad 0x2cb67174ff60a17e + .quad 0xcd2c270ac43d6954 + .quad 0xdd4a3e576a66cab2 + .quad 0x79fa592469d7036c + .quad 0x221503603d8c2599 + .quad 0x59eecdf9390be1d0 + .quad 0xa9422044728ce3f1 + .quad 0x82891c667a94f0f4 + .quad 0x7b1df4b73890f436 + + // 2^96 * 7 * G + + .quad 0xe492f2e0b3b2a224 + .quad 0x7c6c9e062b551160 + .quad 0x15eb8fe20d7f7b0e + .quad 0x61fcef2658fc5992 + .quad 0x5f2e221807f8f58c + .quad 0xe3555c9fd49409d4 + .quad 0xb2aaa88d1fb6a630 + .quad 0x68698245d352e03d + .quad 0xdbb15d852a18187a + .quad 0xf3e4aad386ddacd7 + .quad 0x44bae2810ff6c482 + .quad 0x46cf4c473daf01cf + + // 2^96 * 8 * G + + .quad 0x426525ed9ec4e5f9 + .quad 0x0e5eda0116903303 + .quad 0x72b1a7f2cbe5cadc + .quad 0x29387bcd14eb5f40 + .quad 0x213c6ea7f1498140 + .quad 0x7c1e7ef8392b4854 + .quad 0x2488c38c5629ceba + .quad 0x1065aae50d8cc5bb + .quad 0x1c2c4525df200d57 + .quad 0x5c3b2dd6bfca674a + .quad 0x0a07e7b1e1834030 + .quad 0x69a198e64f1ce716 + + // 2^100 * 1 * G + + .quad 0x7afcd613efa9d697 + .quad 0x0cc45aa41c067959 + .quad 0xa56fe104c1fada96 + .quad 0x3a73b70472e40365 + .quad 0x7b26e56b9e2d4734 + .quad 0xc4c7132b81c61675 + .quad 0xef5c9525ec9cde7f + .quad 0x39c80b16e71743ad + .quad 0x0f196e0d1b826c68 + .quad 0xf71ff0e24960e3db + .quad 0x6113167023b7436c + .quad 0x0cf0ea5877da7282 + + // 2^100 * 2 * G + + .quad 0x196c80a4ddd4ccbd + .quad 0x22e6f55d95f2dd9d + .quad 0xc75e33c740d6c71b + .quad 0x7bb51279cb3c042f + .quad 0xe332ced43ba6945a + .quad 0xde0b1361e881c05d + .quad 0x1ad40f095e67ed3b + .quad 0x5da8acdab8c63d5d + .quad 0xc4b6664a3a70159f + .quad 0x76194f0f0a904e14 + .quad 0xa5614c39a4096c13 + .quad 0x6cd0ff50979feced + + // 2^100 * 3 * G + + .quad 0xc0e067e78f4428ac + .quad 0x14835ab0a61135e3 + .quad 0xf21d14f338062935 + .quad 0x6390a4c8df04849c + .quad 0x7fecfabdb04ba18e + .quad 0xd0fc7bfc3bddbcf7 + .quad 0xa41d486e057a131c + .quad 0x641a4391f2223a61 + .quad 0xc5c6b95aa606a8db + .quad 0x914b7f9eb06825f1 + .quad 0x2a731f6b44fc9eff + .quad 0x30ddf38562705cfc + + // 2^100 * 4 * G + + .quad 0x4e3dcbdad1bff7f9 + .quad 0xc9118e8220645717 + .quad 0xbacccebc0f189d56 + .quad 0x1b4822e9d4467668 + .quad 0x33bef2bd68bcd52c + .quad 0xc649dbb069482ef2 + .quad 0xb5b6ee0c41cb1aee + .quad 0x5c294d270212a7e5 + .quad 0xab360a7f25563781 + .quad 0x2512228a480f7958 + .quad 0xc75d05276114b4e3 + .quad 0x222d9625d976fe2a + + // 2^100 * 5 * G + + .quad 0x1c717f85b372ace1 + .quad 0x81930e694638bf18 + .quad 0x239cad056bc08b58 + .quad 0x0b34271c87f8fff4 + .quad 0x0f94be7e0a344f85 + .quad 0xeb2faa8c87f22c38 + .quad 0x9ce1e75e4ee16f0f + .quad 0x43e64e5418a08dea + .quad 0x8155e2521a35ce63 + .quad 0xbe100d4df912028e + .quad 0xbff80bf8a57ddcec + .quad 0x57342dc96d6bc6e4 + + // 2^100 * 6 * G + + .quad 0xefeef065c8ce5998 + .quad 0xbf029510b5cbeaa2 + .quad 0x8c64a10620b7c458 + .quad 0x35134fb231c24855 + .quad 0xf3c3bcb71e707bf6 + .quad 0x351d9b8c7291a762 + .quad 0x00502e6edad69a33 + .quad 0x522f521f1ec8807f + .quad 0x272c1f46f9a3902b + .quad 0xc91ba3b799657bcc + .quad 0xae614b304f8a1c0e + .quad 0x7afcaad70b99017b + + // 2^100 * 7 * G + + .quad 0xc25ded54a4b8be41 + .quad 0x902d13e11bb0e2dd + .quad 0x41f43233cde82ab2 + .quad 0x1085faa5c3aae7cb + .quad 0xa88141ecef842b6b + .quad 0x55e7b14797abe6c5 + .quad 0x8c748f9703784ffe + .quad 0x5b50a1f7afcd00b7 + .quad 0x9b840f66f1361315 + .quad 0x18462242701003e9 + .quad 0x65ed45fae4a25080 + .quad 0x0a2862393fda7320 + + // 2^100 * 8 * G + + .quad 0x46ab13c8347cbc9d + .quad 0x3849e8d499c12383 + .quad 0x4cea314087d64ac9 + .quad 0x1f354134b1a29ee7 + .quad 0x960e737b6ecb9d17 + .quad 0xfaf24948d67ceae1 + .quad 0x37e7a9b4d55e1b89 + .quad 0x5cb7173cb46c59eb + .quad 0x4a89e68b82b7abf0 + .quad 0xf41cd9279ba6b7b9 + .quad 0x16e6c210e18d876f + .quad 0x7cacdb0f7f1b09c6 + + // 2^104 * 1 * G + + .quad 0x9062b2e0d91a78bc + .quad 0x47c9889cc8509667 + .quad 0x9df54a66405070b8 + .quad 0x7369e6a92493a1bf + .quad 0xe1014434dcc5caed + .quad 0x47ed5d963c84fb33 + .quad 0x70019576ed86a0e7 + .quad 0x25b2697bd267f9e4 + .quad 0x9d673ffb13986864 + .quad 0x3ca5fbd9415dc7b8 + .quad 0xe04ecc3bdf273b5e + .quad 0x1420683db54e4cd2 + + // 2^104 * 2 * G + + .quad 0xb478bd1e249dd197 + .quad 0x620c35005e58c102 + .quad 0xfb02d32fccbaac5c + .quad 0x60b63bebf508a72d + .quad 0x34eebb6fc1cc5ad0 + .quad 0x6a1b0ce99646ac8b + .quad 0xd3b0da49a66bde53 + .quad 0x31e83b4161d081c1 + .quad 0x97e8c7129e062b4f + .quad 0x49e48f4f29320ad8 + .quad 0x5bece14b6f18683f + .quad 0x55cf1eb62d550317 + + // 2^104 * 3 * G + + .quad 0x5879101065c23d58 + .quad 0x8b9d086d5094819c + .quad 0xe2402fa912c55fa7 + .quad 0x669a6564570891d4 + .quad 0x3076b5e37df58c52 + .quad 0xd73ab9dde799cc36 + .quad 0xbd831ce34913ee20 + .quad 0x1a56fbaa62ba0133 + .quad 0x943e6b505c9dc9ec + .quad 0x302557bba77c371a + .quad 0x9873ae5641347651 + .quad 0x13c4836799c58a5c + + // 2^104 * 4 * G + + .quad 0x423a5d465ab3e1b9 + .quad 0xfc13c187c7f13f61 + .quad 0x19f83664ecb5b9b6 + .quad 0x66f80c93a637b607 + .quad 0xc4dcfb6a5d8bd080 + .quad 0xdeebc4ec571a4842 + .quad 0xd4b2e883b8e55365 + .quad 0x50bdc87dc8e5b827 + .quad 0x606d37836edfe111 + .quad 0x32353e15f011abd9 + .quad 0x64b03ac325b73b96 + .quad 0x1dd56444725fd5ae + + // 2^104 * 5 * G + + .quad 0x8fa47ff83362127d + .quad 0xbc9f6ac471cd7c15 + .quad 0x6e71454349220c8b + .quad 0x0e645912219f732e + .quad 0xc297e60008bac89a + .quad 0x7d4cea11eae1c3e0 + .quad 0xf3e38be19fe7977c + .quad 0x3a3a450f63a305cd + .quad 0x078f2f31d8394627 + .quad 0x389d3183de94a510 + .quad 0xd1e36c6d17996f80 + .quad 0x318c8d9393a9a87b + + // 2^104 * 6 * G + + .quad 0xf2745d032afffe19 + .quad 0x0c9f3c497f24db66 + .quad 0xbc98d3e3ba8598ef + .quad 0x224c7c679a1d5314 + .quad 0x5d669e29ab1dd398 + .quad 0xfc921658342d9e3b + .quad 0x55851dfdf35973cd + .quad 0x509a41c325950af6 + .quad 0xbdc06edca6f925e9 + .quad 0x793ef3f4641b1f33 + .quad 0x82ec12809d833e89 + .quad 0x05bff02328a11389 + + // 2^104 * 7 * G + + .quad 0x3632137023cae00b + .quad 0x544acf0ad1accf59 + .quad 0x96741049d21a1c88 + .quad 0x780b8cc3fa2a44a7 + .quad 0x6881a0dd0dc512e4 + .quad 0x4fe70dc844a5fafe + .quad 0x1f748e6b8f4a5240 + .quad 0x576277cdee01a3ea + .quad 0x1ef38abc234f305f + .quad 0x9a577fbd1405de08 + .quad 0x5e82a51434e62a0d + .quad 0x5ff418726271b7a1 + + // 2^104 * 8 * G + + .quad 0x398e080c1789db9d + .quad 0xa7602025f3e778f5 + .quad 0xfa98894c06bd035d + .quad 0x106a03dc25a966be + .quad 0xe5db47e813b69540 + .quad 0xf35d2a3b432610e1 + .quad 0xac1f26e938781276 + .quad 0x29d4db8ca0a0cb69 + .quad 0xd9ad0aaf333353d0 + .quad 0x38669da5acd309e5 + .quad 0x3c57658ac888f7f0 + .quad 0x4ab38a51052cbefa + + // 2^108 * 1 * G + + .quad 0xdfdacbee4324c0e9 + .quad 0x054442883f955bb7 + .quad 0xdef7aaa8ea31609f + .quad 0x68aee70642287cff + .quad 0xf68fe2e8809de054 + .quad 0xe3bc096a9c82bad1 + .quad 0x076353d40aadbf45 + .quad 0x7b9b1fb5dea1959e + .quad 0xf01cc8f17471cc0c + .quad 0x95242e37579082bb + .quad 0x27776093d3e46b5f + .quad 0x2d13d55a28bd85fb + + // 2^108 * 2 * G + + .quad 0xfac5d2065b35b8da + .quad 0xa8da8a9a85624bb7 + .quad 0xccd2ca913d21cd0f + .quad 0x6b8341ee8bf90d58 + .quad 0xbf019cce7aee7a52 + .quad 0xa8ded2b6e454ead3 + .quad 0x3c619f0b87a8bb19 + .quad 0x3619b5d7560916d8 + .quad 0x3579f26b0282c4b2 + .quad 0x64d592f24fafefae + .quad 0xb7cded7b28c8c7c0 + .quad 0x6a927b6b7173a8d7 + + // 2^108 * 3 * G + + .quad 0x1f6db24f986e4656 + .quad 0x1021c02ed1e9105b + .quad 0xf8ff3fff2cc0a375 + .quad 0x1d2a6bf8c6c82592 + .quad 0x8d7040863ece88eb + .quad 0xf0e307a980eec08c + .quad 0xac2250610d788fda + .quad 0x056d92a43a0d478d + .quad 0x1b05a196fc3da5a1 + .quad 0x77d7a8c243b59ed0 + .quad 0x06da3d6297d17918 + .quad 0x66fbb494f12353f7 + + // 2^108 * 4 * G + + .quad 0x751a50b9d85c0fb8 + .quad 0xd1afdc258bcf097b + .quad 0x2f16a6a38309a969 + .quad 0x14ddff9ee5b00659 + .quad 0xd6d70996f12309d6 + .quad 0xdbfb2385e9c3d539 + .quad 0x46d602b0f7552411 + .quad 0x270a0b0557843e0c + .quad 0x61ff0640a7862bcc + .quad 0x81cac09a5f11abfe + .quad 0x9047830455d12abb + .quad 0x19a4bde1945ae873 + + // 2^108 * 5 * G + + .quad 0x9b9f26f520a6200a + .quad 0x64804443cf13eaf8 + .quad 0x8a63673f8631edd3 + .quad 0x72bbbce11ed39dc1 + .quad 0x40c709dec076c49f + .quad 0x657bfaf27f3e53f6 + .quad 0x40662331eca042c4 + .quad 0x14b375487eb4df04 + .quad 0xae853c94ab66dc47 + .quad 0xeb62343edf762d6e + .quad 0xf08e0e186fb2f7d1 + .quad 0x4f0b1c02700ab37a + + // 2^108 * 6 * G + + .quad 0xe1706787d81951fa + .quad 0xa10a2c8eb290c77b + .quad 0xe7382fa03ed66773 + .quad 0x0a4d84710bcc4b54 + .quad 0x79fd21ccc1b2e23f + .quad 0x4ae7c281453df52a + .quad 0xc8172ec9d151486b + .quad 0x68abe9443e0a7534 + .quad 0xda12c6c407831dcb + .quad 0x0da230d74d5c510d + .quad 0x4ab1531e6bd404e1 + .quad 0x4106b166bcf440ef + + // 2^108 * 7 * G + + .quad 0x02e57a421cd23668 + .quad 0x4ad9fb5d0eaef6fd + .quad 0x954e6727b1244480 + .quad 0x7f792f9d2699f331 + .quad 0xa485ccd539e4ecf2 + .quad 0x5aa3f3ad0555bab5 + .quad 0x145e3439937df82d + .quad 0x1238b51e1214283f + .quad 0x0b886b925fd4d924 + .quad 0x60906f7a3626a80d + .quad 0xecd367b4b98abd12 + .quad 0x2876beb1def344cf + + // 2^108 * 8 * G + + .quad 0xdc84e93563144691 + .quad 0x632fe8a0d61f23f4 + .quad 0x4caa800612a9a8d5 + .quad 0x48f9dbfa0e9918d3 + .quad 0xd594b3333a8a85f8 + .quad 0x4ea37689e78d7d58 + .quad 0x73bf9f455e8e351f + .quad 0x5507d7d2bc41ebb4 + .quad 0x1ceb2903299572fc + .quad 0x7c8ccaa29502d0ee + .quad 0x91bfa43411cce67b + .quad 0x5784481964a831e7 + + // 2^112 * 1 * G + + .quad 0xda7c2b256768d593 + .quad 0x98c1c0574422ca13 + .quad 0xf1a80bd5ca0ace1d + .quad 0x29cdd1adc088a690 + .quad 0xd6cfd1ef5fddc09c + .quad 0xe82b3efdf7575dce + .quad 0x25d56b5d201634c2 + .quad 0x3041c6bb04ed2b9b + .quad 0x0ff2f2f9d956e148 + .quad 0xade797759f356b2e + .quad 0x1a4698bb5f6c025c + .quad 0x104bbd6814049a7b + + // 2^112 * 2 * G + + .quad 0x51f0fd3168f1ed67 + .quad 0x2c811dcdd86f3bc2 + .quad 0x44dc5c4304d2f2de + .quad 0x5be8cc57092a7149 + .quad 0xa95d9a5fd67ff163 + .quad 0xe92be69d4cc75681 + .quad 0xb7f8024cde20f257 + .quad 0x204f2a20fb072df5 + .quad 0xc8143b3d30ebb079 + .quad 0x7589155abd652e30 + .quad 0x653c3c318f6d5c31 + .quad 0x2570fb17c279161f + + // 2^112 * 3 * G + + .quad 0x3efa367f2cb61575 + .quad 0xf5f96f761cd6026c + .quad 0xe8c7142a65b52562 + .quad 0x3dcb65ea53030acd + .quad 0x192ea9550bb8245a + .quad 0xc8e6fba88f9050d1 + .quad 0x7986ea2d88a4c935 + .quad 0x241c5f91de018668 + .quad 0x28d8172940de6caa + .quad 0x8fbf2cf022d9733a + .quad 0x16d7fcdd235b01d1 + .quad 0x08420edd5fcdf0e5 + + // 2^112 * 4 * G + + .quad 0xcdff20ab8362fa4a + .quad 0x57e118d4e21a3e6e + .quad 0xe3179617fc39e62b + .quad 0x0d9a53efbc1769fd + .quad 0x0358c34e04f410ce + .quad 0xb6135b5a276e0685 + .quad 0x5d9670c7ebb91521 + .quad 0x04d654f321db889c + .quad 0x5e7dc116ddbdb5d5 + .quad 0x2954deb68da5dd2d + .quad 0x1cb608173334a292 + .quad 0x4a7a4f2618991ad7 + + // 2^112 * 5 * G + + .quad 0xf4a718025fb15f95 + .quad 0x3df65f346b5c1b8f + .quad 0xcdfcf08500e01112 + .quad 0x11b50c4cddd31848 + .quad 0x24c3b291af372a4b + .quad 0x93da8270718147f2 + .quad 0xdd84856486899ef2 + .quad 0x4a96314223e0ee33 + .quad 0xa6e8274408a4ffd6 + .quad 0x738e177e9c1576d9 + .quad 0x773348b63d02b3f2 + .quad 0x4f4bce4dce6bcc51 + + // 2^112 * 6 * G + + .quad 0xa71fce5ae2242584 + .quad 0x26ea725692f58a9e + .quad 0xd21a09d71cea3cf4 + .quad 0x73fcdd14b71c01e6 + .quad 0x30e2616ec49d0b6f + .quad 0xe456718fcaec2317 + .quad 0x48eb409bf26b4fa6 + .quad 0x3042cee561595f37 + .quad 0x427e7079449bac41 + .quad 0x855ae36dbce2310a + .quad 0x4cae76215f841a7c + .quad 0x389e740c9a9ce1d6 + + // 2^112 * 7 * G + + .quad 0x64fcb3ae34dcb9ce + .quad 0x97500323e348d0ad + .quad 0x45b3f07d62c6381b + .quad 0x61545379465a6788 + .quad 0xc9bd78f6570eac28 + .quad 0xe55b0b3227919ce1 + .quad 0x65fc3eaba19b91ed + .quad 0x25c425e5d6263690 + .quad 0x3f3e06a6f1d7de6e + .quad 0x3ef976278e062308 + .quad 0x8c14f6264e8a6c77 + .quad 0x6539a08915484759 + + // 2^112 * 8 * G + + .quad 0xe9d21f74c3d2f773 + .quad 0xc150544125c46845 + .quad 0x624e5ce8f9b99e33 + .quad 0x11c5e4aac5cd186c + .quad 0xddc4dbd414bb4a19 + .quad 0x19b2bc3c98424f8e + .quad 0x48a89fd736ca7169 + .quad 0x0f65320ef019bd90 + .quad 0xd486d1b1cafde0c6 + .quad 0x4f3fe6e3163b5181 + .quad 0x59a8af0dfaf2939a + .quad 0x4cabc7bdec33072a + + // 2^116 * 1 * G + + .quad 0x16faa8fb532f7428 + .quad 0xdbd42ea046a4e272 + .quad 0x5337653b8b9ea480 + .quad 0x4065947223973f03 + .quad 0xf7c0a19c1a54a044 + .quad 0x4a1c5e2477bd9fbb + .quad 0xa6e3ca115af22972 + .quad 0x1819bb953f2e9e0d + .quad 0x498fbb795e042e84 + .quad 0x7d0dd89a7698b714 + .quad 0x8bfb0ba427fe6295 + .quad 0x36ba82e721200524 + + // 2^116 * 2 * G + + .quad 0xd60ecbb74245ec41 + .quad 0xfd9be89e34348716 + .quad 0xc9240afee42284de + .quad 0x4472f648d0531db4 + .quad 0xc8d69d0a57274ed5 + .quad 0x45ba803260804b17 + .quad 0xdf3cda102255dfac + .quad 0x77d221232709b339 + .quad 0x498a6d7064ad94d8 + .quad 0xa5b5c8fd9af62263 + .quad 0x8ca8ed0545c141f4 + .quad 0x2c63bec3662d358c + + // 2^116 * 3 * G + + .quad 0x7fe60d8bea787955 + .quad 0xb9dc117eb5f401b7 + .quad 0x91c7c09a19355cce + .quad 0x22692ef59442bedf + .quad 0x9a518b3a8586f8bf + .quad 0x9ee71af6cbb196f0 + .quad 0xaa0625e6a2385cf2 + .quad 0x1deb2176ddd7c8d1 + .quad 0x8563d19a2066cf6c + .quad 0x401bfd8c4dcc7cd7 + .quad 0xd976a6becd0d8f62 + .quad 0x67cfd773a278b05e + + // 2^116 * 4 * G + + .quad 0x8dec31faef3ee475 + .quad 0x99dbff8a9e22fd92 + .quad 0x512d11594e26cab1 + .quad 0x0cde561eec4310b9 + .quad 0x2d5fa9855a4e586a + .quad 0x65f8f7a449beab7e + .quad 0xaa074dddf21d33d3 + .quad 0x185cba721bcb9dee + .quad 0x93869da3f4e3cb41 + .quad 0xbf0392f540f7977e + .quad 0x026204fcd0463b83 + .quad 0x3ec91a769eec6eed + + // 2^116 * 5 * G + + .quad 0x1e9df75bf78166ad + .quad 0x4dfda838eb0cd7af + .quad 0xba002ed8c1eaf988 + .quad 0x13fedb3e11f33cfc + .quad 0x0fad2fb7b0a3402f + .quad 0x46615ecbfb69f4a8 + .quad 0xf745bcc8c5f8eaa6 + .quad 0x7a5fa8794a94e896 + .quad 0x52958faa13cd67a1 + .quad 0x965ee0818bdbb517 + .quad 0x16e58daa2e8845b3 + .quad 0x357d397d5499da8f + + // 2^116 * 6 * G + + .quad 0x1ebfa05fb0bace6c + .quad 0xc934620c1caf9a1e + .quad 0xcc771cc41d82b61a + .quad 0x2d94a16aa5f74fec + .quad 0x481dacb4194bfbf8 + .quad 0x4d77e3f1bae58299 + .quad 0x1ef4612e7d1372a0 + .quad 0x3a8d867e70ff69e1 + .quad 0x6f58cd5d55aff958 + .quad 0xba3eaa5c75567721 + .quad 0x75c123999165227d + .quad 0x69be1343c2f2b35e + + // 2^116 * 7 * G + + .quad 0x0e091d5ee197c92a + .quad 0x4f51019f2945119f + .quad 0x143679b9f034e99c + .quad 0x7d88112e4d24c696 + .quad 0x82bbbdac684b8de3 + .quad 0xa2f4c7d03fca0718 + .quad 0x337f92fbe096aaa8 + .quad 0x200d4d8c63587376 + .quad 0x208aed4b4893b32b + .quad 0x3efbf23ebe59b964 + .quad 0xd762deb0dba5e507 + .quad 0x69607bd681bd9d94 + + // 2^116 * 8 * G + + .quad 0xf6be021068de1ce1 + .quad 0xe8d518e70edcbc1f + .quad 0xe3effdd01b5505a5 + .quad 0x35f63353d3ec3fd0 + .quad 0x3b7f3bd49323a902 + .quad 0x7c21b5566b2c6e53 + .quad 0xe5ba8ff53a7852a7 + .quad 0x28bc77a5838ece00 + .quad 0x63ba78a8e25d8036 + .quad 0x63651e0094333490 + .quad 0x48d82f20288ce532 + .quad 0x3a31abfa36b57524 + + // 2^120 * 1 * G + + .quad 0x239e9624089c0a2e + .quad 0xc748c4c03afe4738 + .quad 0x17dbed2a764fa12a + .quad 0x639b93f0321c8582 + .quad 0xc08f788f3f78d289 + .quad 0xfe30a72ca1404d9f + .quad 0xf2778bfccf65cc9d + .quad 0x7ee498165acb2021 + .quad 0x7bd508e39111a1c3 + .quad 0x2b2b90d480907489 + .quad 0xe7d2aec2ae72fd19 + .quad 0x0edf493c85b602a6 + + // 2^120 * 2 * G + + .quad 0xaecc8158599b5a68 + .quad 0xea574f0febade20e + .quad 0x4fe41d7422b67f07 + .quad 0x403b92e3019d4fb4 + .quad 0x6767c4d284764113 + .quad 0xa090403ff7f5f835 + .quad 0x1c8fcffacae6bede + .quad 0x04c00c54d1dfa369 + .quad 0x4dc22f818b465cf8 + .quad 0x71a0f35a1480eff8 + .quad 0xaee8bfad04c7d657 + .quad 0x355bb12ab26176f4 + + // 2^120 * 3 * G + + .quad 0xa71e64cc7493bbf4 + .quad 0xe5bd84d9eca3b0c3 + .quad 0x0a6bc50cfa05e785 + .quad 0x0f9b8132182ec312 + .quad 0xa301dac75a8c7318 + .quad 0xed90039db3ceaa11 + .quad 0x6f077cbf3bae3f2d + .quad 0x7518eaf8e052ad8e + .quad 0xa48859c41b7f6c32 + .quad 0x0f2d60bcf4383298 + .quad 0x1815a929c9b1d1d9 + .quad 0x47c3871bbb1755c4 + + // 2^120 * 4 * G + + .quad 0x5144539771ec4f48 + .quad 0xf805b17dc98c5d6e + .quad 0xf762c11a47c3c66b + .quad 0x00b89b85764699dc + .quad 0xfbe65d50c85066b0 + .quad 0x62ecc4b0b3a299b0 + .quad 0xe53754ea441ae8e0 + .quad 0x08fea02ce8d48d5f + .quad 0x824ddd7668deead0 + .quad 0xc86445204b685d23 + .quad 0xb514cfcd5d89d665 + .quad 0x473829a74f75d537 + + // 2^120 * 5 * G + + .quad 0x82d2da754679c418 + .quad 0xe63bd7d8b2618df0 + .quad 0x355eef24ac47eb0a + .quad 0x2078684c4833c6b4 + .quad 0x23d9533aad3902c9 + .quad 0x64c2ddceef03588f + .quad 0x15257390cfe12fb4 + .quad 0x6c668b4d44e4d390 + .quad 0x3b48cf217a78820c + .quad 0xf76a0ab281273e97 + .quad 0xa96c65a78c8eed7b + .quad 0x7411a6054f8a433f + + // 2^120 * 6 * G + + .quad 0x4d659d32b99dc86d + .quad 0x044cdc75603af115 + .quad 0xb34c712cdcc2e488 + .quad 0x7c136574fb8134ff + .quad 0x579ae53d18b175b4 + .quad 0x68713159f392a102 + .quad 0x8455ecba1eef35f5 + .quad 0x1ec9a872458c398f + .quad 0xb8e6a4d400a2509b + .quad 0x9b81d7020bc882b4 + .quad 0x57e7cc9bf1957561 + .quad 0x3add88a5c7cd6460 + + // 2^120 * 7 * G + + .quad 0xab895770b635dcf2 + .quad 0x02dfef6cf66c1fbc + .quad 0x85530268beb6d187 + .quad 0x249929fccc879e74 + .quad 0x85c298d459393046 + .quad 0x8f7e35985ff659ec + .quad 0x1d2ca22af2f66e3a + .quad 0x61ba1131a406a720 + .quad 0xa3d0a0f116959029 + .quad 0x023b6b6cba7ebd89 + .quad 0x7bf15a3e26783307 + .quad 0x5620310cbbd8ece7 + + // 2^120 * 8 * G + + .quad 0x528993434934d643 + .quad 0xb9dbf806a51222f5 + .quad 0x8f6d878fc3f41c22 + .quad 0x37676a2a4d9d9730 + .quad 0x6646b5f477e285d6 + .quad 0x40e8ff676c8f6193 + .quad 0xa6ec7311abb594dd + .quad 0x7ec846f3658cec4d + .quad 0x9b5e8f3f1da22ec7 + .quad 0x130f1d776c01cd13 + .quad 0x214c8fcfa2989fb8 + .quad 0x6daaf723399b9dd5 + + // 2^124 * 1 * G + + .quad 0x591e4a5610628564 + .quad 0x2a4bb87ca8b4df34 + .quad 0xde2a2572e7a38e43 + .quad 0x3cbdabd9fee5046e + .quad 0x81aebbdd2cd13070 + .quad 0x962e4325f85a0e9e + .quad 0xde9391aacadffecb + .quad 0x53177fda52c230e6 + .quad 0xa7bc970650b9de79 + .quad 0x3d12a7fbc301b59b + .quad 0x02652e68d36ae38c + .quad 0x79d739835a6199dc + + // 2^124 * 2 * G + + .quad 0xd9354df64131c1bd + .quad 0x758094a186ec5822 + .quad 0x4464ee12e459f3c2 + .quad 0x6c11fce4cb133282 + .quad 0x21c9d9920d591737 + .quad 0x9bea41d2e9b46cd6 + .quad 0xe20e84200d89bfca + .quad 0x79d99f946eae5ff8 + .quad 0xf17b483568673205 + .quad 0x387deae83caad96c + .quad 0x61b471fd56ffe386 + .quad 0x31741195b745a599 + + // 2^124 * 3 * G + + .quad 0xe8d10190b77a360b + .quad 0x99b983209995e702 + .quad 0xbd4fdff8fa0247aa + .quad 0x2772e344e0d36a87 + .quad 0x17f8ba683b02a047 + .quad 0x50212096feefb6c8 + .quad 0x70139be21556cbe2 + .quad 0x203e44a11d98915b + .quad 0xd6863eba37b9e39f + .quad 0x105bc169723b5a23 + .quad 0x104f6459a65c0762 + .quad 0x567951295b4d38d4 + + // 2^124 * 4 * G + + .quad 0x535fd60613037524 + .quad 0xe210adf6b0fbc26a + .quad 0xac8d0a9b23e990ae + .quad 0x47204d08d72fdbf9 + .quad 0x07242eb30d4b497f + .quad 0x1ef96306b9bccc87 + .quad 0x37950934d8116f45 + .quad 0x05468d6201405b04 + .quad 0x00f565a9f93267de + .quad 0xcecfd78dc0d58e8a + .quad 0xa215e2dcf318e28e + .quad 0x4599ee919b633352 + + // 2^124 * 5 * G + + .quad 0xd3c220ca70e0e76b + .quad 0xb12bea58ea9f3094 + .quad 0x294ddec8c3271282 + .quad 0x0c3539e1a1d1d028 + .quad 0xac746d6b861ae579 + .quad 0x31ab0650f6aea9dc + .quad 0x241d661140256d4c + .quad 0x2f485e853d21a5de + .quad 0x329744839c0833f3 + .quad 0x6fe6257fd2abc484 + .quad 0x5327d1814b358817 + .quad 0x65712585893fe9bc + + // 2^124 * 6 * G + + .quad 0x9c102fb732a61161 + .quad 0xe48e10dd34d520a8 + .quad 0x365c63546f9a9176 + .quad 0x32f6fe4c046f6006 + .quad 0x81c29f1bd708ee3f + .quad 0xddcb5a05ae6407d0 + .quad 0x97aec1d7d2a3eba7 + .quad 0x1590521a91d50831 + .quad 0x40a3a11ec7910acc + .quad 0x9013dff8f16d27ae + .quad 0x1a9720d8abb195d4 + .quad 0x1bb9fe452ea98463 + + // 2^124 * 7 * G + + .quad 0xe9d1d950b3d54f9e + .quad 0x2d5f9cbee00d33c1 + .quad 0x51c2c656a04fc6ac + .quad 0x65c091ee3c1cbcc9 + .quad 0xcf5e6c95cc36747c + .quad 0x294201536b0bc30d + .quad 0x453ac67cee797af0 + .quad 0x5eae6ab32a8bb3c9 + .quad 0x7083661114f118ea + .quad 0x2b37b87b94349cad + .quad 0x7273f51cb4e99f40 + .quad 0x78a2a95823d75698 + + // 2^124 * 8 * G + + .quad 0xa2b072e95c8c2ace + .quad 0x69cffc96651e9c4b + .quad 0x44328ef842e7b42b + .quad 0x5dd996c122aadeb3 + .quad 0xb4f23c425ef83207 + .quad 0xabf894d3c9a934b5 + .quad 0xd0708c1339fd87f7 + .quad 0x1876789117166130 + .quad 0x925b5ef0670c507c + .quad 0x819bc842b93c33bf + .quad 0x10792e9a70dd003f + .quad 0x59ad4b7a6e28dc74 + + // 2^128 * 1 * G + + .quad 0x5f3a7562eb3dbe47 + .quad 0xf7ea38548ebda0b8 + .quad 0x00c3e53145747299 + .quad 0x1304e9e71627d551 + .quad 0x583b04bfacad8ea2 + .quad 0x29b743e8148be884 + .quad 0x2b1e583b0810c5db + .quad 0x2b5449e58eb3bbaa + .quad 0x789814d26adc9cfe + .quad 0x3c1bab3f8b48dd0b + .quad 0xda0fe1fff979c60a + .quad 0x4468de2d7c2dd693 + + // 2^128 * 2 * G + + .quad 0x51bb355e9419469e + .quad 0x33e6dc4c23ddc754 + .quad 0x93a5b6d6447f9962 + .quad 0x6cce7c6ffb44bd63 + .quad 0x4b9ad8c6f86307ce + .quad 0x21113531435d0c28 + .quad 0xd4a866c5657a772c + .quad 0x5da6427e63247352 + .quad 0x1a94c688deac22ca + .quad 0xb9066ef7bbae1ff8 + .quad 0x88ad8c388d59580f + .quad 0x58f29abfe79f2ca8 + + // 2^128 * 3 * G + + .quad 0xe90ecfab8de73e68 + .quad 0x54036f9f377e76a5 + .quad 0xf0495b0bbe015982 + .quad 0x577629c4a7f41e36 + .quad 0x4b5a64bf710ecdf6 + .quad 0xb14ce538462c293c + .quad 0x3643d056d50b3ab9 + .quad 0x6af93724185b4870 + .quad 0x3220024509c6a888 + .quad 0xd2e036134b558973 + .quad 0x83e236233c33289f + .quad 0x701f25bb0caec18f + + // 2^128 * 4 * G + + .quad 0xc3a8b0f8e4616ced + .quad 0xf700660e9e25a87d + .quad 0x61e3061ff4bca59c + .quad 0x2e0c92bfbdc40be9 + .quad 0x9d18f6d97cbec113 + .quad 0x844a06e674bfdbe4 + .quad 0x20f5b522ac4e60d6 + .quad 0x720a5bc050955e51 + .quad 0x0c3f09439b805a35 + .quad 0xe84e8b376242abfc + .quad 0x691417f35c229346 + .quad 0x0e9b9cbb144ef0ec + + // 2^128 * 5 * G + + .quad 0xfbbad48ffb5720ad + .quad 0xee81916bdbf90d0e + .quad 0xd4813152635543bf + .quad 0x221104eb3f337bd8 + .quad 0x8dee9bd55db1beee + .quad 0xc9c3ab370a723fb9 + .quad 0x44a8f1bf1c68d791 + .quad 0x366d44191cfd3cde + .quad 0x9e3c1743f2bc8c14 + .quad 0x2eda26fcb5856c3b + .quad 0xccb82f0e68a7fb97 + .quad 0x4167a4e6bc593244 + + // 2^128 * 6 * G + + .quad 0x643b9d2876f62700 + .quad 0x5d1d9d400e7668eb + .quad 0x1b4b430321fc0684 + .quad 0x7938bb7e2255246a + .quad 0xc2be2665f8ce8fee + .quad 0xe967ff14e880d62c + .quad 0xf12e6e7e2f364eee + .quad 0x34b33370cb7ed2f6 + .quad 0xcdc591ee8681d6cc + .quad 0xce02109ced85a753 + .quad 0xed7485c158808883 + .quad 0x1176fc6e2dfe65e4 + + // 2^128 * 7 * G + + .quad 0xb4af6cd05b9c619b + .quad 0x2ddfc9f4b2a58480 + .quad 0x3d4fa502ebe94dc4 + .quad 0x08fc3a4c677d5f34 + .quad 0xdb90e28949770eb8 + .quad 0x98fbcc2aacf440a3 + .quad 0x21354ffeded7879b + .quad 0x1f6a3e54f26906b6 + .quad 0x60a4c199d30734ea + .quad 0x40c085b631165cd6 + .quad 0xe2333e23f7598295 + .quad 0x4f2fad0116b900d1 + + // 2^128 * 8 * G + + .quad 0x44beb24194ae4e54 + .quad 0x5f541c511857ef6c + .quad 0xa61e6b2d368d0498 + .quad 0x445484a4972ef7ab + .quad 0x962cd91db73bb638 + .quad 0xe60577aafc129c08 + .quad 0x6f619b39f3b61689 + .quad 0x3451995f2944ee81 + .quad 0x9152fcd09fea7d7c + .quad 0x4a816c94b0935cf6 + .quad 0x258e9aaa47285c40 + .quad 0x10b89ca6042893b7 + + // 2^132 * 1 * G + + .quad 0x9b2a426e3b646025 + .quad 0x32127190385ce4cf + .quad 0xa25cffc2dd6dea45 + .quad 0x06409010bea8de75 + .quad 0xd67cded679d34aa0 + .quad 0xcc0b9ec0cc4db39f + .quad 0xa535a456e35d190f + .quad 0x2e05d9eaf61f6fef + .quad 0xc447901ad61beb59 + .quad 0x661f19bce5dc880a + .quad 0x24685482b7ca6827 + .quad 0x293c778cefe07f26 + + // 2^132 * 2 * G + + .quad 0x86809e7007069096 + .quad 0xaad75b15e4e50189 + .quad 0x07f35715a21a0147 + .quad 0x0487f3f112815d5e + .quad 0x16c795d6a11ff200 + .quad 0xcb70d0e2b15815c9 + .quad 0x89f293209b5395b5 + .quad 0x50b8c2d031e47b4f + .quad 0x48350c08068a4962 + .quad 0x6ffdd05351092c9a + .quad 0x17af4f4aaf6fc8dd + .quad 0x4b0553b53cdba58b + + // 2^132 * 3 * G + + .quad 0x9c65fcbe1b32ff79 + .quad 0xeb75ea9f03b50f9b + .quad 0xfced2a6c6c07e606 + .quad 0x35106cd551717908 + .quad 0xbf05211b27c152d4 + .quad 0x5ec26849bd1af639 + .quad 0x5e0b2caa8e6fab98 + .quad 0x054c8bdd50bd0840 + .quad 0x38a0b12f1dcf073d + .quad 0x4b60a8a3b7f6a276 + .quad 0xfed5ac25d3404f9a + .quad 0x72e82d5e5505c229 + + // 2^132 * 4 * G + + .quad 0x6b0b697ff0d844c8 + .quad 0xbb12f85cd979cb49 + .quad 0xd2a541c6c1da0f1f + .quad 0x7b7c242958ce7211 + .quad 0x00d9cdfd69771d02 + .quad 0x410276cd6cfbf17e + .quad 0x4c45306c1cb12ec7 + .quad 0x2857bf1627500861 + .quad 0x9f21903f0101689e + .quad 0xd779dfd3bf861005 + .quad 0xa122ee5f3deb0f1b + .quad 0x510df84b485a00d4 + + // 2^132 * 5 * G + + .quad 0xa54133bb9277a1fa + .quad 0x74ec3b6263991237 + .quad 0x1a3c54dc35d2f15a + .quad 0x2d347144e482ba3a + .quad 0x24b3c887c70ac15e + .quad 0xb0f3a557fb81b732 + .quad 0x9b2cde2fe578cc1b + .quad 0x4cf7ed0703b54f8e + .quad 0x6bd47c6598fbee0f + .quad 0x9e4733e2ab55be2d + .quad 0x1093f624127610c5 + .quad 0x4e05e26ad0a1eaa4 + + // 2^132 * 6 * G + + .quad 0xda9b6b624b531f20 + .quad 0x429a760e77509abb + .quad 0xdbe9f522e823cb80 + .quad 0x618f1856880c8f82 + .quad 0x1833c773e18fe6c0 + .quad 0xe3c4711ad3c87265 + .quad 0x3bfd3c4f0116b283 + .quad 0x1955875eb4cd4db8 + .quad 0x6da6de8f0e399799 + .quad 0x7ad61aa440fda178 + .quad 0xb32cd8105e3563dd + .quad 0x15f6beae2ae340ae + + // 2^132 * 7 * G + + .quad 0x862bcb0c31ec3a62 + .quad 0x810e2b451138f3c2 + .quad 0x788ec4b839dac2a4 + .quad 0x28f76867ae2a9281 + .quad 0xba9a0f7b9245e215 + .quad 0xf368612dd98c0dbb + .quad 0x2e84e4cbf220b020 + .quad 0x6ba92fe962d90eda + .quad 0x3e4df9655884e2aa + .quad 0xbd62fbdbdbd465a5 + .quad 0xd7596caa0de9e524 + .quad 0x6e8042ccb2b1b3d7 + + // 2^132 * 8 * G + + .quad 0xf10d3c29ce28ca6e + .quad 0xbad34540fcb6093d + .quad 0xe7426ed7a2ea2d3f + .quad 0x08af9d4e4ff298b9 + .quad 0x1530653616521f7e + .quad 0x660d06b896203dba + .quad 0x2d3989bc545f0879 + .quad 0x4b5303af78ebd7b0 + .quad 0x72f8a6c3bebcbde8 + .quad 0x4f0fca4adc3a8e89 + .quad 0x6fa9d4e8c7bfdf7a + .quad 0x0dcf2d679b624eb7 + + // 2^136 * 1 * G + + .quad 0x3d5947499718289c + .quad 0x12ebf8c524533f26 + .quad 0x0262bfcb14c3ef15 + .quad 0x20b878d577b7518e + .quad 0x753941be5a45f06e + .quad 0xd07caeed6d9c5f65 + .quad 0x11776b9c72ff51b6 + .quad 0x17d2d1d9ef0d4da9 + .quad 0x27f2af18073f3e6a + .quad 0xfd3fe519d7521069 + .quad 0x22e3b72c3ca60022 + .quad 0x72214f63cc65c6a7 + + // 2^136 * 2 * G + + .quad 0xb4e37f405307a693 + .quad 0xaba714d72f336795 + .quad 0xd6fbd0a773761099 + .quad 0x5fdf48c58171cbc9 + .quad 0x1d9db7b9f43b29c9 + .quad 0xd605824a4f518f75 + .quad 0xf2c072bd312f9dc4 + .quad 0x1f24ac855a1545b0 + .quad 0x24d608328e9505aa + .quad 0x4748c1d10c1420ee + .quad 0xc7ffe45c06fb25a2 + .quad 0x00ba739e2ae395e6 + + // 2^136 * 3 * G + + .quad 0x592e98de5c8790d6 + .quad 0xe5bfb7d345c2a2df + .quad 0x115a3b60f9b49922 + .quad 0x03283a3e67ad78f3 + .quad 0xae4426f5ea88bb26 + .quad 0x360679d984973bfb + .quad 0x5c9f030c26694e50 + .quad 0x72297de7d518d226 + .quad 0x48241dc7be0cb939 + .quad 0x32f19b4d8b633080 + .quad 0xd3dfc90d02289308 + .quad 0x05e1296846271945 + + // 2^136 * 4 * G + + .quad 0xba82eeb32d9c495a + .quad 0xceefc8fcf12bb97c + .quad 0xb02dabae93b5d1e0 + .quad 0x39c00c9c13698d9b + .quad 0xadbfbbc8242c4550 + .quad 0xbcc80cecd03081d9 + .quad 0x843566a6f5c8df92 + .quad 0x78cf25d38258ce4c + .quad 0x15ae6b8e31489d68 + .quad 0xaa851cab9c2bf087 + .quad 0xc9a75a97f04efa05 + .quad 0x006b52076b3ff832 + + // 2^136 * 5 * G + + .quad 0x29e0cfe19d95781c + .quad 0xb681df18966310e2 + .quad 0x57df39d370516b39 + .quad 0x4d57e3443bc76122 + .quad 0xf5cb7e16b9ce082d + .quad 0x3407f14c417abc29 + .quad 0xd4b36bce2bf4a7ab + .quad 0x7de2e9561a9f75ce + .quad 0xde70d4f4b6a55ecb + .quad 0x4801527f5d85db99 + .quad 0xdbc9c440d3ee9a81 + .quad 0x6b2a90af1a6029ed + + // 2^136 * 6 * G + + .quad 0x6923f4fc9ae61e97 + .quad 0x5735281de03f5fd1 + .quad 0xa764ae43e6edd12d + .quad 0x5fd8f4e9d12d3e4a + .quad 0x77ebf3245bb2d80a + .quad 0xd8301b472fb9079b + .quad 0xc647e6f24cee7333 + .quad 0x465812c8276c2109 + .quad 0x4d43beb22a1062d9 + .quad 0x7065fb753831dc16 + .quad 0x180d4a7bde2968d7 + .quad 0x05b32c2b1cb16790 + + // 2^136 * 7 * G + + .quad 0xc8c05eccd24da8fd + .quad 0xa1cf1aac05dfef83 + .quad 0xdbbeeff27df9cd61 + .quad 0x3b5556a37b471e99 + .quad 0xf7fca42c7ad58195 + .quad 0x3214286e4333f3cc + .quad 0xb6c29d0d340b979d + .quad 0x31771a48567307e1 + .quad 0x32b0c524e14dd482 + .quad 0xedb351541a2ba4b6 + .quad 0xa3d16048282b5af3 + .quad 0x4fc079d27a7336eb + + // 2^136 * 8 * G + + .quad 0x51c938b089bf2f7f + .quad 0x2497bd6502dfe9a7 + .quad 0xffffc09c7880e453 + .quad 0x124567cecaf98e92 + .quad 0xdc348b440c86c50d + .quad 0x1337cbc9cc94e651 + .quad 0x6422f74d643e3cb9 + .quad 0x241170c2bae3cd08 + .quad 0x3ff9ab860ac473b4 + .quad 0xf0911dee0113e435 + .quad 0x4ae75060ebc6c4af + .quad 0x3f8612966c87000d + + // 2^140 * 1 * G + + .quad 0x0c9c5303f7957be4 + .quad 0xa3c31a20e085c145 + .quad 0xb0721d71d0850050 + .quad 0x0aba390eab0bf2da + .quad 0x529fdffe638c7bf3 + .quad 0xdf2b9e60388b4995 + .quad 0xe027b34f1bad0249 + .quad 0x7bc92fc9b9fa74ed + .quad 0x9f97ef2e801ad9f9 + .quad 0x83697d5479afda3a + .quad 0xe906b3ffbd596b50 + .quad 0x02672b37dd3fb8e0 + + // 2^140 * 2 * G + + .quad 0x48b2ca8b260885e4 + .quad 0xa4286bec82b34c1c + .quad 0x937e1a2617f58f74 + .quad 0x741d1fcbab2ca2a5 + .quad 0xee9ba729398ca7f5 + .quad 0xeb9ca6257a4849db + .quad 0x29eb29ce7ec544e1 + .quad 0x232ca21ef736e2c8 + .quad 0xbf61423d253fcb17 + .quad 0x08803ceafa39eb14 + .quad 0xf18602df9851c7af + .quad 0x0400f3a049e3414b + + // 2^140 * 3 * G + + .quad 0xabce0476ba61c55b + .quad 0x36a3d6d7c4d39716 + .quad 0x6eb259d5e8d82d09 + .quad 0x0c9176e984d756fb + .quad 0x2efba412a06e7b06 + .quad 0x146785452c8d2560 + .quad 0xdf9713ebd67a91c7 + .quad 0x32830ac7157eadf3 + .quad 0x0e782a7ab73769e8 + .quad 0x04a05d7875b18e2c + .quad 0x29525226ebcceae1 + .quad 0x0d794f8383eba820 + + // 2^140 * 4 * G + + .quad 0xff35f5cb9e1516f4 + .quad 0xee805bcf648aae45 + .quad 0xf0d73c2bb93a9ef3 + .quad 0x097b0bf22092a6c2 + .quad 0x7be44ce7a7a2e1ac + .quad 0x411fd93efad1b8b7 + .quad 0x1734a1d70d5f7c9b + .quad 0x0d6592233127db16 + .quad 0xc48bab1521a9d733 + .quad 0xa6c2eaead61abb25 + .quad 0x625c6c1cc6cb4305 + .quad 0x7fc90fea93eb3a67 + + // 2^140 * 5 * G + + .quad 0x0408f1fe1f5c5926 + .quad 0x1a8f2f5e3b258bf4 + .quad 0x40a951a2fdc71669 + .quad 0x6598ee93c98b577e + .quad 0xc527deb59c7cb23d + .quad 0x955391695328404e + .quad 0xd64392817ccf2c7a + .quad 0x6ce97dabf7d8fa11 + .quad 0x25b5a8e50ef7c48f + .quad 0xeb6034116f2ce532 + .quad 0xc5e75173e53de537 + .quad 0x73119fa08c12bb03 + + // 2^140 * 6 * G + + .quad 0xed30129453f1a4cb + .quad 0xbce621c9c8f53787 + .quad 0xfacb2b1338bee7b9 + .quad 0x3025798a9ea8428c + .quad 0x7845b94d21f4774d + .quad 0xbf62f16c7897b727 + .quad 0x671857c03c56522b + .quad 0x3cd6a85295621212 + .quad 0x3fecde923aeca999 + .quad 0xbdaa5b0062e8c12f + .quad 0x67b99dfc96988ade + .quad 0x3f52c02852661036 + + // 2^140 * 7 * G + + .quad 0xffeaa48e2a1351c6 + .quad 0x28624754fa7f53d7 + .quad 0x0b5ba9e57582ddf1 + .quad 0x60c0104ba696ac59 + .quad 0x9258bf99eec416c6 + .quad 0xac8a5017a9d2f671 + .quad 0x629549ab16dea4ab + .quad 0x05d0e85c99091569 + .quad 0x051de020de9cbe97 + .quad 0xfa07fc56b50bcf74 + .quad 0x378cec9f0f11df65 + .quad 0x36853c69ab96de4d + + // 2^140 * 8 * G + + .quad 0x36d9b8de78f39b2d + .quad 0x7f42ed71a847b9ec + .quad 0x241cd1d679bd3fde + .quad 0x6a704fec92fbce6b + .quad 0x4433c0b0fac5e7be + .quad 0x724bae854c08dcbe + .quad 0xf1f24cc446978f9b + .quad 0x4a0aff6d62825fc8 + .quad 0xe917fb9e61095301 + .quad 0xc102df9402a092f8 + .quad 0xbf09e2f5fa66190b + .quad 0x681109bee0dcfe37 + + // 2^144 * 1 * G + + .quad 0x559a0cc9782a0dde + .quad 0x551dcdb2ea718385 + .quad 0x7f62865b31ef238c + .quad 0x504aa7767973613d + .quad 0x9c18fcfa36048d13 + .quad 0x29159db373899ddd + .quad 0xdc9f350b9f92d0aa + .quad 0x26f57eee878a19d4 + .quad 0x0cab2cd55687efb1 + .quad 0x5180d162247af17b + .quad 0x85c15a344f5a2467 + .quad 0x4041943d9dba3069 + + // 2^144 * 2 * G + + .quad 0xc3c0eeba43ebcc96 + .quad 0x8d749c9c26ea9caf + .quad 0xd9fa95ee1c77ccc6 + .quad 0x1420a1d97684340f + .quad 0x4b217743a26caadd + .quad 0x47a6b424648ab7ce + .quad 0xcb1d4f7a03fbc9e3 + .quad 0x12d931429800d019 + .quad 0x00c67799d337594f + .quad 0x5e3c5140b23aa47b + .quad 0x44182854e35ff395 + .quad 0x1b4f92314359a012 + + // 2^144 * 3 * G + + .quad 0x3e5c109d89150951 + .quad 0x39cefa912de9696a + .quad 0x20eae43f975f3020 + .quad 0x239b572a7f132dae + .quad 0x33cf3030a49866b1 + .quad 0x251f73d2215f4859 + .quad 0xab82aa4051def4f6 + .quad 0x5ff191d56f9a23f6 + .quad 0x819ed433ac2d9068 + .quad 0x2883ab795fc98523 + .quad 0xef4572805593eb3d + .quad 0x020c526a758f36cb + + // 2^144 * 4 * G + + .quad 0x779834f89ed8dbbc + .quad 0xc8f2aaf9dc7ca46c + .quad 0xa9524cdca3e1b074 + .quad 0x02aacc4615313877 + .quad 0xe931ef59f042cc89 + .quad 0x2c589c9d8e124bb6 + .quad 0xadc8e18aaec75997 + .quad 0x452cfe0a5602c50c + .quad 0x86a0f7a0647877df + .quad 0xbbc464270e607c9f + .quad 0xab17ea25f1fb11c9 + .quad 0x4cfb7d7b304b877b + + // 2^144 * 5 * G + + .quad 0x72b43d6cb89b75fe + .quad 0x54c694d99c6adc80 + .quad 0xb8c3aa373ee34c9f + .quad 0x14b4622b39075364 + .quad 0xe28699c29789ef12 + .quad 0x2b6ecd71df57190d + .quad 0xc343c857ecc970d0 + .quad 0x5b1d4cbc434d3ac5 + .quad 0xb6fb2615cc0a9f26 + .quad 0x3a4f0e2bb88dcce5 + .quad 0x1301498b3369a705 + .quad 0x2f98f71258592dd1 + + // 2^144 * 6 * G + + .quad 0x0c94a74cb50f9e56 + .quad 0x5b1ff4a98e8e1320 + .quad 0x9a2acc2182300f67 + .quad 0x3a6ae249d806aaf9 + .quad 0x2e12ae444f54a701 + .quad 0xfcfe3ef0a9cbd7de + .quad 0xcebf890d75835de0 + .quad 0x1d8062e9e7614554 + .quad 0x657ada85a9907c5a + .quad 0x1a0ea8b591b90f62 + .quad 0x8d0e1dfbdf34b4e9 + .quad 0x298b8ce8aef25ff3 + + // 2^144 * 7 * G + + .quad 0x2a927953eff70cb2 + .quad 0x4b89c92a79157076 + .quad 0x9418457a30a7cf6a + .quad 0x34b8a8404d5ce485 + .quad 0x837a72ea0a2165de + .quad 0x3fab07b40bcf79f6 + .quad 0x521636c77738ae70 + .quad 0x6ba6271803a7d7dc + .quad 0xc26eecb583693335 + .quad 0xd5a813df63b5fefd + .quad 0xa293aa9aa4b22573 + .quad 0x71d62bdd465e1c6a + + // 2^144 * 8 * G + + .quad 0x6533cc28d378df80 + .quad 0xf6db43790a0fa4b4 + .quad 0xe3645ff9f701da5a + .quad 0x74d5f317f3172ba4 + .quad 0xcd2db5dab1f75ef5 + .quad 0xd77f95cf16b065f5 + .quad 0x14571fea3f49f085 + .quad 0x1c333621262b2b3d + .quad 0xa86fe55467d9ca81 + .quad 0x398b7c752b298c37 + .quad 0xda6d0892e3ac623b + .quad 0x4aebcc4547e9d98c + + // 2^148 * 1 * G + + .quad 0x53175a7205d21a77 + .quad 0xb0c04422d3b934d4 + .quad 0xadd9f24bdd5deadc + .quad 0x074f46e69f10ff8c + .quad 0x0de9b204a059a445 + .quad 0xe15cb4aa4b17ad0f + .quad 0xe1bbec521f79c557 + .quad 0x2633f1b9d071081b + .quad 0xc1fb4177018b9910 + .quad 0xa6ea20dc6c0fe140 + .quad 0xd661f3e74354c6ff + .quad 0x5ecb72e6f1a3407a + + // 2^148 * 2 * G + + .quad 0xa515a31b2259fb4e + .quad 0x0960f3972bcac52f + .quad 0xedb52fec8d3454cb + .quad 0x382e2720c476c019 + .quad 0xfeeae106e8e86997 + .quad 0x9863337f98d09383 + .quad 0x9470480eaa06ebef + .quad 0x038b6898d4c5c2d0 + .quad 0xf391c51d8ace50a6 + .quad 0x3142d0b9ae2d2948 + .quad 0xdb4d5a1a7f24ca80 + .quad 0x21aeba8b59250ea8 + + // 2^148 * 3 * G + + .quad 0x24f13b34cf405530 + .quad 0x3c44ea4a43088af7 + .quad 0x5dd5c5170006a482 + .quad 0x118eb8f8890b086d + .quad 0x53853600f0087f23 + .quad 0x4c461879da7d5784 + .quad 0x6af303deb41f6860 + .quad 0x0a3c16c5c27c18ed + .quad 0x17e49c17cc947f3d + .quad 0xccc6eda6aac1d27b + .quad 0xdf6092ceb0f08e56 + .quad 0x4909b3e22c67c36b + + // 2^148 * 4 * G + + .quad 0x9c9c85ea63fe2e89 + .quad 0xbe1baf910e9412ec + .quad 0x8f7baa8a86fbfe7b + .quad 0x0fb17f9fef968b6c + .quad 0x59a16676706ff64e + .quad 0x10b953dd0d86a53d + .quad 0x5848e1e6ce5c0b96 + .quad 0x2d8b78e712780c68 + .quad 0x79d5c62eafc3902b + .quad 0x773a215289e80728 + .quad 0xc38ae640e10120b9 + .quad 0x09ae23717b2b1a6d + + // 2^148 * 5 * G + + .quad 0xbb6a192a4e4d083c + .quad 0x34ace0630029e192 + .quad 0x98245a59aafabaeb + .quad 0x6d9c8a9ada97faac + .quad 0x10ab8fa1ad32b1d0 + .quad 0xe9aced1be2778b24 + .quad 0xa8856bc0373de90f + .quad 0x66f35ddddda53996 + .quad 0xd27d9afb24997323 + .quad 0x1bb7e07ef6f01d2e + .quad 0x2ba7472df52ecc7f + .quad 0x03019b4f646f9dc8 + + // 2^148 * 6 * G + + .quad 0x04a186b5565345cd + .quad 0xeee76610bcc4116a + .quad 0x689c73b478fb2a45 + .quad 0x387dcbff65697512 + .quad 0xaf09b214e6b3dc6b + .quad 0x3f7573b5ad7d2f65 + .quad 0xd019d988100a23b0 + .quad 0x392b63a58b5c35f7 + .quad 0x4093addc9c07c205 + .quad 0xc565be15f532c37e + .quad 0x63dbecfd1583402a + .quad 0x61722b4aef2e032e + + // 2^148 * 7 * G + + .quad 0x0012aafeecbd47af + .quad 0x55a266fb1cd46309 + .quad 0xf203eb680967c72c + .quad 0x39633944ca3c1429 + .quad 0xd6b07a5581cb0e3c + .quad 0x290ff006d9444969 + .quad 0x08680b6a16dcda1f + .quad 0x5568d2b75a06de59 + .quad 0x8d0cb88c1b37cfe1 + .quad 0x05b6a5a3053818f3 + .quad 0xf2e9bc04b787d959 + .quad 0x6beba1249add7f64 + + // 2^148 * 8 * G + + .quad 0x1d06005ca5b1b143 + .quad 0x6d4c6bb87fd1cda2 + .quad 0x6ef5967653fcffe7 + .quad 0x097c29e8c1ce1ea5 + .quad 0x5c3cecb943f5a53b + .quad 0x9cc9a61d06c08df2 + .quad 0xcfba639a85895447 + .quad 0x5a845ae80df09fd5 + .quad 0x4ce97dbe5deb94ca + .quad 0x38d0a4388c709c48 + .quad 0xc43eced4a169d097 + .quad 0x0a1249fff7e587c3 + + // 2^152 * 1 * G + + .quad 0x12f0071b276d01c9 + .quad 0xe7b8bac586c48c70 + .quad 0x5308129b71d6fba9 + .quad 0x5d88fbf95a3db792 + .quad 0x0b408d9e7354b610 + .quad 0x806b32535ba85b6e + .quad 0xdbe63a034a58a207 + .quad 0x173bd9ddc9a1df2c + .quad 0x2b500f1efe5872df + .quad 0x58d6582ed43918c1 + .quad 0xe6ed278ec9673ae0 + .quad 0x06e1cd13b19ea319 + + // 2^152 * 2 * G + + .quad 0x40d0ad516f166f23 + .quad 0x118e32931fab6abe + .quad 0x3fe35e14a04d088e + .quad 0x3080603526e16266 + .quad 0x472baf629e5b0353 + .quad 0x3baa0b90278d0447 + .quad 0x0c785f469643bf27 + .quad 0x7f3a6a1a8d837b13 + .quad 0xf7e644395d3d800b + .quad 0x95a8d555c901edf6 + .quad 0x68cd7830592c6339 + .quad 0x30d0fded2e51307e + + // 2^152 * 3 * G + + .quad 0xe0594d1af21233b3 + .quad 0x1bdbe78ef0cc4d9c + .quad 0x6965187f8f499a77 + .quad 0x0a9214202c099868 + .quad 0x9cb4971e68b84750 + .quad 0xa09572296664bbcf + .quad 0x5c8de72672fa412b + .quad 0x4615084351c589d9 + .quad 0xbc9019c0aeb9a02e + .quad 0x55c7110d16034cae + .quad 0x0e6df501659932ec + .quad 0x3bca0d2895ca5dfe + + // 2^152 * 4 * G + + .quad 0x40f031bc3c5d62a4 + .quad 0x19fc8b3ecff07a60 + .quad 0x98183da2130fb545 + .quad 0x5631deddae8f13cd + .quad 0x9c688eb69ecc01bf + .quad 0xf0bc83ada644896f + .quad 0xca2d955f5f7a9fe2 + .quad 0x4ea8b4038df28241 + .quad 0x2aed460af1cad202 + .quad 0x46305305a48cee83 + .quad 0x9121774549f11a5f + .quad 0x24ce0930542ca463 + + // 2^152 * 5 * G + + .quad 0x1fe890f5fd06c106 + .quad 0xb5c468355d8810f2 + .quad 0x827808fe6e8caf3e + .quad 0x41d4e3c28a06d74b + .quad 0x3fcfa155fdf30b85 + .quad 0xd2f7168e36372ea4 + .quad 0xb2e064de6492f844 + .quad 0x549928a7324f4280 + .quad 0xf26e32a763ee1a2e + .quad 0xae91e4b7d25ffdea + .quad 0xbc3bd33bd17f4d69 + .quad 0x491b66dec0dcff6a + + // 2^152 * 6 * G + + .quad 0x98f5b13dc7ea32a7 + .quad 0xe3d5f8cc7e16db98 + .quad 0xac0abf52cbf8d947 + .quad 0x08f338d0c85ee4ac + .quad 0x75f04a8ed0da64a1 + .quad 0xed222caf67e2284b + .quad 0x8234a3791f7b7ba4 + .quad 0x4cf6b8b0b7018b67 + .quad 0xc383a821991a73bd + .quad 0xab27bc01df320c7a + .quad 0xc13d331b84777063 + .quad 0x530d4a82eb078a99 + + // 2^152 * 7 * G + + .quad 0x004c3630e1f94825 + .quad 0x7e2d78268cab535a + .quad 0xc7482323cc84ff8b + .quad 0x65ea753f101770b9 + .quad 0x6d6973456c9abf9e + .quad 0x257fb2fc4900a880 + .quad 0x2bacf412c8cfb850 + .quad 0x0db3e7e00cbfbd5b + .quad 0x3d66fc3ee2096363 + .quad 0x81d62c7f61b5cb6b + .quad 0x0fbe044213443b1a + .quad 0x02a4ec1921e1a1db + + // 2^152 * 8 * G + + .quad 0x5ce6259a3b24b8a2 + .quad 0xb8577acc45afa0b8 + .quad 0xcccbe6e88ba07037 + .quad 0x3d143c51127809bf + .quad 0xf5c86162f1cf795f + .quad 0x118c861926ee57f2 + .quad 0x172124851c063578 + .quad 0x36d12b5dec067fcf + .quad 0x126d279179154557 + .quad 0xd5e48f5cfc783a0a + .quad 0x36bdb6e8df179bac + .quad 0x2ef517885ba82859 + + // 2^156 * 1 * G + + .quad 0x88bd438cd11e0d4a + .quad 0x30cb610d43ccf308 + .quad 0xe09a0e3791937bcc + .quad 0x4559135b25b1720c + .quad 0x1ea436837c6da1e9 + .quad 0xf9c189af1fb9bdbe + .quad 0x303001fcce5dd155 + .quad 0x28a7c99ebc57be52 + .quad 0xb8fd9399e8d19e9d + .quad 0x908191cb962423ff + .quad 0xb2b948d747c742a3 + .quad 0x37f33226d7fb44c4 + + // 2^156 * 2 * G + + .quad 0x0dae8767b55f6e08 + .quad 0x4a43b3b35b203a02 + .quad 0xe3725a6e80af8c79 + .quad 0x0f7a7fd1705fa7a3 + .quad 0x33912553c821b11d + .quad 0x66ed42c241e301df + .quad 0x066fcc11104222fd + .quad 0x307a3b41c192168f + .quad 0x8eeb5d076eb55ce0 + .quad 0x2fc536bfaa0d925a + .quad 0xbe81830fdcb6c6e8 + .quad 0x556c7045827baf52 + + // 2^156 * 3 * G + + .quad 0x8e2b517302e9d8b7 + .quad 0xe3e52269248714e8 + .quad 0xbd4fbd774ca960b5 + .quad 0x6f4b4199c5ecada9 + .quad 0xb94b90022bf44406 + .quad 0xabd4237eff90b534 + .quad 0x7600a960faf86d3a + .quad 0x2f45abdac2322ee3 + .quad 0x61af4912c8ef8a6a + .quad 0xe58fa4fe43fb6e5e + .quad 0xb5afcc5d6fd427cf + .quad 0x6a5393281e1e11eb + + // 2^156 * 4 * G + + .quad 0xf3da5139a5d1ee89 + .quad 0x8145457cff936988 + .quad 0x3f622fed00e188c4 + .quad 0x0f513815db8b5a3d + .quad 0x0fff04fe149443cf + .quad 0x53cac6d9865cddd7 + .quad 0x31385b03531ed1b7 + .quad 0x5846a27cacd1039d + .quad 0x4ff5cdac1eb08717 + .quad 0x67e8b29590f2e9bc + .quad 0x44093b5e237afa99 + .quad 0x0d414bed8708b8b2 + + // 2^156 * 5 * G + + .quad 0xcfb68265fd0e75f6 + .quad 0xe45b3e28bb90e707 + .quad 0x7242a8de9ff92c7a + .quad 0x685b3201933202dd + .quad 0x81886a92294ac9e8 + .quad 0x23162b45d55547be + .quad 0x94cfbc4403715983 + .quad 0x50eb8fdb134bc401 + .quad 0xc0b73ec6d6b330cd + .quad 0x84e44807132faff1 + .quad 0x732b7352c4a5dee1 + .quad 0x5d7c7cf1aa7cd2d2 + + // 2^156 * 6 * G + + .quad 0xaf3b46bf7a4aafa2 + .quad 0xb78705ec4d40d411 + .quad 0x114f0c6aca7c15e3 + .quad 0x3f364faaa9489d4d + .quad 0x33d1013e9b73a562 + .quad 0x925cef5748ec26e1 + .quad 0xa7fce614dd468058 + .quad 0x78b0fad41e9aa438 + .quad 0xbf56a431ed05b488 + .quad 0xa533e66c9c495c7e + .quad 0xe8652baf87f3651a + .quad 0x0241800059d66c33 + + // 2^156 * 7 * G + + .quad 0xceb077fea37a5be4 + .quad 0xdb642f02e5a5eeb7 + .quad 0xc2e6d0c5471270b8 + .quad 0x4771b65538e4529c + .quad 0x28350c7dcf38ea01 + .quad 0x7c6cdbc0b2917ab6 + .quad 0xace7cfbe857082f7 + .quad 0x4d2845aba2d9a1e0 + .quad 0xbb537fe0447070de + .quad 0xcba744436dd557df + .quad 0xd3b5a3473600dbcb + .quad 0x4aeabbe6f9ffd7f8 + + // 2^156 * 8 * G + + .quad 0x4630119e40d8f78c + .quad 0xa01a9bc53c710e11 + .quad 0x486d2b258910dd79 + .quad 0x1e6c47b3db0324e5 + .quad 0x6a2134bcc4a9c8f2 + .quad 0xfbf8fd1c8ace2e37 + .quad 0x000ae3049911a0ba + .quad 0x046e3a616bc89b9e + .quad 0x14e65442f03906be + .quad 0x4a019d54e362be2a + .quad 0x68ccdfec8dc230c7 + .quad 0x7cfb7e3faf6b861c + + // 2^160 * 1 * G + + .quad 0x4637974e8c58aedc + .quad 0xb9ef22fbabf041a4 + .quad 0xe185d956e980718a + .quad 0x2f1b78fab143a8a6 + .quad 0x96eebffb305b2f51 + .quad 0xd3f938ad889596b8 + .quad 0xf0f52dc746d5dd25 + .quad 0x57968290bb3a0095 + .quad 0xf71ab8430a20e101 + .quad 0xf393658d24f0ec47 + .quad 0xcf7509a86ee2eed1 + .quad 0x7dc43e35dc2aa3e1 + + // 2^160 * 2 * G + + .quad 0x85966665887dd9c3 + .quad 0xc90f9b314bb05355 + .quad 0xc6e08df8ef2079b1 + .quad 0x7ef72016758cc12f + .quad 0x5a782a5c273e9718 + .quad 0x3576c6995e4efd94 + .quad 0x0f2ed8051f237d3e + .quad 0x044fb81d82d50a99 + .quad 0xc1df18c5a907e3d9 + .quad 0x57b3371dce4c6359 + .quad 0xca704534b201bb49 + .quad 0x7f79823f9c30dd2e + + // 2^160 * 3 * G + + .quad 0x8334d239a3b513e8 + .quad 0xc13670d4b91fa8d8 + .quad 0x12b54136f590bd33 + .quad 0x0a4e0373d784d9b4 + .quad 0x6a9c1ff068f587ba + .quad 0x0827894e0050c8de + .quad 0x3cbf99557ded5be7 + .quad 0x64a9b0431c06d6f0 + .quad 0x2eb3d6a15b7d2919 + .quad 0xb0b4f6a0d53a8235 + .quad 0x7156ce4389a45d47 + .quad 0x071a7d0ace18346c + + // 2^160 * 4 * G + + .quad 0xd3072daac887ba0b + .quad 0x01262905bfa562ee + .quad 0xcf543002c0ef768b + .quad 0x2c3bcc7146ea7e9c + .quad 0xcc0c355220e14431 + .quad 0x0d65950709b15141 + .quad 0x9af5621b209d5f36 + .quad 0x7c69bcf7617755d3 + .quad 0x07f0d7eb04e8295f + .quad 0x10db18252f50f37d + .quad 0xe951a9a3171798d7 + .quad 0x6f5a9a7322aca51d + + // 2^160 * 5 * G + + .quad 0x8ba1000c2f41c6c5 + .quad 0xc49f79c10cfefb9b + .quad 0x4efa47703cc51c9f + .quad 0x494e21a2e147afca + .quad 0xe729d4eba3d944be + .quad 0x8d9e09408078af9e + .quad 0x4525567a47869c03 + .quad 0x02ab9680ee8d3b24 + .quad 0xefa48a85dde50d9a + .quad 0x219a224e0fb9a249 + .quad 0xfa091f1dd91ef6d9 + .quad 0x6b5d76cbea46bb34 + + // 2^160 * 6 * G + + .quad 0x8857556cec0cd994 + .quad 0x6472dc6f5cd01dba + .quad 0xaf0169148f42b477 + .quad 0x0ae333f685277354 + .quad 0xe0f941171e782522 + .quad 0xf1e6ae74036936d3 + .quad 0x408b3ea2d0fcc746 + .quad 0x16fb869c03dd313e + .quad 0x288e199733b60962 + .quad 0x24fc72b4d8abe133 + .quad 0x4811f7ed0991d03e + .quad 0x3f81e38b8f70d075 + + // 2^160 * 7 * G + + .quad 0x7f910fcc7ed9affe + .quad 0x545cb8a12465874b + .quad 0xa8397ed24b0c4704 + .quad 0x50510fc104f50993 + .quad 0x0adb7f355f17c824 + .quad 0x74b923c3d74299a4 + .quad 0xd57c3e8bcbf8eaf7 + .quad 0x0ad3e2d34cdedc3d + .quad 0x6f0c0fc5336e249d + .quad 0x745ede19c331cfd9 + .quad 0xf2d6fd0009eefe1c + .quad 0x127c158bf0fa1ebe + + // 2^160 * 8 * G + + .quad 0xf6197c422e9879a2 + .quad 0xa44addd452ca3647 + .quad 0x9b413fc14b4eaccb + .quad 0x354ef87d07ef4f68 + .quad 0xdea28fc4ae51b974 + .quad 0x1d9973d3744dfe96 + .quad 0x6240680b873848a8 + .quad 0x4ed82479d167df95 + .quad 0xfee3b52260c5d975 + .quad 0x50352efceb41b0b8 + .quad 0x8808ac30a9f6653c + .quad 0x302d92d20539236d + + // 2^164 * 1 * G + + .quad 0x4c59023fcb3efb7c + .quad 0x6c2fcb99c63c2a94 + .quad 0xba4190e2c3c7e084 + .quad 0x0e545daea51874d9 + .quad 0x957b8b8b0df53c30 + .quad 0x2a1c770a8e60f098 + .quad 0xbbc7a670345796de + .quad 0x22a48f9a90c99bc9 + .quad 0x6b7dc0dc8d3fac58 + .quad 0x5497cd6ce6e42bfd + .quad 0x542f7d1bf400d305 + .quad 0x4159f47f048d9136 + + // 2^164 * 2 * G + + .quad 0x20ad660839e31e32 + .quad 0xf81e1bd58405be50 + .quad 0xf8064056f4dabc69 + .quad 0x14d23dd4ce71b975 + .quad 0x748515a8bbd24839 + .quad 0x77128347afb02b55 + .quad 0x50ba2ac649a2a17f + .quad 0x060525513ad730f1 + .quad 0xf2398e098aa27f82 + .quad 0x6d7982bb89a1b024 + .quad 0xfa694084214dd24c + .quad 0x71ab966fa32301c3 + + // 2^164 * 3 * G + + .quad 0x2dcbd8e34ded02fc + .quad 0x1151f3ec596f22aa + .quad 0xbca255434e0328da + .quad 0x35768fbe92411b22 + .quad 0xb1088a0702809955 + .quad 0x43b273ea0b43c391 + .quad 0xca9b67aefe0686ed + .quad 0x605eecbf8335f4ed + .quad 0x83200a656c340431 + .quad 0x9fcd71678ee59c2f + .quad 0x75d4613f71300f8a + .quad 0x7a912faf60f542f9 + + // 2^164 * 4 * G + + .quad 0xb204585e5edc1a43 + .quad 0x9f0e16ee5897c73c + .quad 0x5b82c0ae4e70483c + .quad 0x624a170e2bddf9be + .quad 0x253f4f8dfa2d5597 + .quad 0x25e49c405477130c + .quad 0x00c052e5996b1102 + .quad 0x33cb966e33bb6c4a + .quad 0x597028047f116909 + .quad 0x828ac41c1e564467 + .quad 0x70417dbde6217387 + .quad 0x721627aefbac4384 + + // 2^164 * 5 * G + + .quad 0x97d03bc38736add5 + .quad 0x2f1422afc532b130 + .quad 0x3aa68a057101bbc4 + .quad 0x4c946cf7e74f9fa7 + .quad 0xfd3097bc410b2f22 + .quad 0xf1a05da7b5cfa844 + .quad 0x61289a1def57ca74 + .quad 0x245ea199bb821902 + .quad 0xaedca66978d477f8 + .quad 0x1898ba3c29117fe1 + .quad 0xcf73f983720cbd58 + .quad 0x67da12e6b8b56351 + + // 2^164 * 6 * G + + .quad 0x7067e187b4bd6e07 + .quad 0x6e8f0203c7d1fe74 + .quad 0x93c6aa2f38c85a30 + .quad 0x76297d1f3d75a78a + .quad 0x2b7ef3d38ec8308c + .quad 0x828fd7ec71eb94ab + .quad 0x807c3b36c5062abd + .quad 0x0cb64cb831a94141 + .quad 0x3030fc33534c6378 + .quad 0xb9635c5ce541e861 + .quad 0x15d9a9bed9b2c728 + .quad 0x49233ea3f3775dcb + + // 2^164 * 7 * G + + .quad 0x629398fa8dbffc3a + .quad 0xe12fe52dd54db455 + .quad 0xf3be11dfdaf25295 + .quad 0x628b140dce5e7b51 + .quad 0x7b3985fe1c9f249b + .quad 0x4fd6b2d5a1233293 + .quad 0xceb345941adf4d62 + .quad 0x6987ff6f542de50c + .quad 0x47e241428f83753c + .quad 0x6317bebc866af997 + .quad 0xdabb5b433d1a9829 + .quad 0x074d8d245287fb2d + + // 2^164 * 8 * G + + .quad 0x8337d9cd440bfc31 + .quad 0x729d2ca1af318fd7 + .quad 0xa040a4a4772c2070 + .quad 0x46002ef03a7349be + .quad 0x481875c6c0e31488 + .quad 0x219429b2e22034b4 + .quad 0x7223c98a31283b65 + .quad 0x3420d60b342277f9 + .quad 0xfaa23adeaffe65f7 + .quad 0x78261ed45be0764c + .quad 0x441c0a1e2f164403 + .quad 0x5aea8e567a87d395 + + // 2^168 * 1 * G + + .quad 0x7813c1a2bca4283d + .quad 0xed62f091a1863dd9 + .quad 0xaec7bcb8c268fa86 + .quad 0x10e5d3b76f1cae4c + .quad 0x2dbc6fb6e4e0f177 + .quad 0x04e1bf29a4bd6a93 + .quad 0x5e1966d4787af6e8 + .quad 0x0edc5f5eb426d060 + .quad 0x5453bfd653da8e67 + .quad 0xe9dc1eec24a9f641 + .quad 0xbf87263b03578a23 + .quad 0x45b46c51361cba72 + + // 2^168 * 2 * G + + .quad 0xa9402abf314f7fa1 + .quad 0xe257f1dc8e8cf450 + .quad 0x1dbbd54b23a8be84 + .quad 0x2177bfa36dcb713b + .quad 0xce9d4ddd8a7fe3e4 + .quad 0xab13645676620e30 + .quad 0x4b594f7bb30e9958 + .quad 0x5c1c0aef321229df + .quad 0x37081bbcfa79db8f + .quad 0x6048811ec25f59b3 + .quad 0x087a76659c832487 + .quad 0x4ae619387d8ab5bb + + // 2^168 * 3 * G + + .quad 0x8ddbf6aa5344a32e + .quad 0x7d88eab4b41b4078 + .quad 0x5eb0eb974a130d60 + .quad 0x1a00d91b17bf3e03 + .quad 0x61117e44985bfb83 + .quad 0xfce0462a71963136 + .quad 0x83ac3448d425904b + .quad 0x75685abe5ba43d64 + .quad 0x6e960933eb61f2b2 + .quad 0x543d0fa8c9ff4952 + .quad 0xdf7275107af66569 + .quad 0x135529b623b0e6aa + + // 2^168 * 4 * G + + .quad 0x18f0dbd7add1d518 + .quad 0x979f7888cfc11f11 + .quad 0x8732e1f07114759b + .quad 0x79b5b81a65ca3a01 + .quad 0xf5c716bce22e83fe + .quad 0xb42beb19e80985c1 + .quad 0xec9da63714254aae + .quad 0x5972ea051590a613 + .quad 0x0fd4ac20dc8f7811 + .quad 0x9a9ad294ac4d4fa8 + .quad 0xc01b2d64b3360434 + .quad 0x4f7e9c95905f3bdb + + // 2^168 * 5 * G + + .quad 0x62674bbc5781302e + .quad 0xd8520f3989addc0f + .quad 0x8c2999ae53fbd9c6 + .quad 0x31993ad92e638e4c + .quad 0x71c8443d355299fe + .quad 0x8bcd3b1cdbebead7 + .quad 0x8092499ef1a49466 + .quad 0x1942eec4a144adc8 + .quad 0x7dac5319ae234992 + .quad 0x2c1b3d910cea3e92 + .quad 0x553ce494253c1122 + .quad 0x2a0a65314ef9ca75 + + // 2^168 * 6 * G + + .quad 0x2db7937ff7f927c2 + .quad 0xdb741f0617d0a635 + .quad 0x5982f3a21155af76 + .quad 0x4cf6e218647c2ded + .quad 0xcf361acd3c1c793a + .quad 0x2f9ebcac5a35bc3b + .quad 0x60e860e9a8cda6ab + .quad 0x055dc39b6dea1a13 + .quad 0xb119227cc28d5bb6 + .quad 0x07e24ebc774dffab + .quad 0xa83c78cee4a32c89 + .quad 0x121a307710aa24b6 + + // 2^168 * 7 * G + + .quad 0xe4db5d5e9f034a97 + .quad 0xe153fc093034bc2d + .quad 0x460546919551d3b1 + .quad 0x333fc76c7a40e52d + .quad 0xd659713ec77483c9 + .quad 0x88bfe077b82b96af + .quad 0x289e28231097bcd3 + .quad 0x527bb94a6ced3a9b + .quad 0x563d992a995b482e + .quad 0x3405d07c6e383801 + .quad 0x485035de2f64d8e5 + .quad 0x6b89069b20a7a9f7 + + // 2^168 * 8 * G + + .quad 0x812aa0416270220d + .quad 0x995a89faf9245b4e + .quad 0xffadc4ce5072ef05 + .quad 0x23bc2103aa73eb73 + .quad 0x4082fa8cb5c7db77 + .quad 0x068686f8c734c155 + .quad 0x29e6c8d9f6e7a57e + .quad 0x0473d308a7639bcf + .quad 0xcaee792603589e05 + .quad 0x2b4b421246dcc492 + .quad 0x02a1ef74e601a94f + .quad 0x102f73bfde04341a + + // 2^172 * 1 * G + + .quad 0xb5a2d50c7ec20d3e + .quad 0xc64bdd6ea0c97263 + .quad 0x56e89052c1ff734d + .quad 0x4929c6f72b2ffaba + .quad 0x358ecba293a36247 + .quad 0xaf8f9862b268fd65 + .quad 0x412f7e9968a01c89 + .quad 0x5786f312cd754524 + .quad 0x337788ffca14032c + .quad 0xf3921028447f1ee3 + .quad 0x8b14071f231bccad + .quad 0x4c817b4bf2344783 + + // 2^172 * 2 * G + + .quad 0x0ff853852871b96e + .quad 0xe13e9fab60c3f1bb + .quad 0xeefd595325344402 + .quad 0x0a37c37075b7744b + .quad 0x413ba057a40b4484 + .quad 0xba4c2e1a4f5f6a43 + .quad 0x614ba0a5aee1d61c + .quad 0x78a1531a8b05dc53 + .quad 0x6cbdf1703ad0562b + .quad 0x8ecf4830c92521a3 + .quad 0xdaebd303fd8424e7 + .quad 0x72ad82a42e5ec56f + + // 2^172 * 3 * G + + .quad 0x3f9e8e35bafb65f6 + .quad 0x39d69ec8f27293a1 + .quad 0x6cb8cd958cf6a3d0 + .quad 0x1734778173adae6d + .quad 0xc368939167024bc3 + .quad 0x8e69d16d49502fda + .quad 0xfcf2ec3ce45f4b29 + .quad 0x065f669ea3b4cbc4 + .quad 0x8a00aec75532db4d + .quad 0xb869a4e443e31bb1 + .quad 0x4a0f8552d3a7f515 + .quad 0x19adeb7c303d7c08 + + // 2^172 * 4 * G + + .quad 0xc720cb6153ead9a3 + .quad 0x55b2c97f512b636e + .quad 0xb1e35b5fd40290b1 + .quad 0x2fd9ccf13b530ee2 + .quad 0x9d05ba7d43c31794 + .quad 0x2470c8ff93322526 + .quad 0x8323dec816197438 + .quad 0x2852709881569b53 + .quad 0x07bd475b47f796b8 + .quad 0xd2c7b013542c8f54 + .quad 0x2dbd23f43b24f87e + .quad 0x6551afd77b0901d6 + + // 2^172 * 5 * G + + .quad 0x4546baaf54aac27f + .quad 0xf6f66fecb2a45a28 + .quad 0x582d1b5b562bcfe8 + .quad 0x44b123f3920f785f + .quad 0x68a24ce3a1d5c9ac + .quad 0xbb77a33d10ff6461 + .quad 0x0f86ce4425d3166e + .quad 0x56507c0950b9623b + .quad 0x1206f0b7d1713e63 + .quad 0x353fe3d915bafc74 + .quad 0x194ceb970ad9d94d + .quad 0x62fadd7cf9d03ad3 + + // 2^172 * 6 * G + + .quad 0xc6b5967b5598a074 + .quad 0x5efe91ce8e493e25 + .quad 0xd4b72c4549280888 + .quad 0x20ef1149a26740c2 + .quad 0x3cd7bc61e7ce4594 + .quad 0xcd6b35a9b7dd267e + .quad 0xa080abc84366ef27 + .quad 0x6ec7c46f59c79711 + .quad 0x2f07ad636f09a8a2 + .quad 0x8697e6ce24205e7d + .quad 0xc0aefc05ee35a139 + .quad 0x15e80958b5f9d897 + + // 2^172 * 7 * G + + .quad 0x25a5ef7d0c3e235b + .quad 0x6c39c17fbe134ee7 + .quad 0xc774e1342dc5c327 + .quad 0x021354b892021f39 + .quad 0x4dd1ed355bb061c4 + .quad 0x42dc0cef941c0700 + .quad 0x61305dc1fd86340e + .quad 0x56b2cc930e55a443 + .quad 0x1df79da6a6bfc5a2 + .quad 0x02f3a2749fde4369 + .quad 0xb323d9f2cda390a7 + .quad 0x7be0847b8774d363 + + // 2^172 * 8 * G + + .quad 0x8c99cc5a8b3f55c3 + .quad 0x0611d7253fded2a0 + .quad 0xed2995ff36b70a36 + .quad 0x1f699a54d78a2619 + .quad 0x1466f5af5307fa11 + .quad 0x817fcc7ded6c0af2 + .quad 0x0a6de44ec3a4a3fb + .quad 0x74071475bc927d0b + .quad 0xe77292f373e7ea8a + .quad 0x296537d2cb045a31 + .quad 0x1bd0653ed3274fde + .quad 0x2f9a2c4476bd2966 + + // 2^176 * 1 * G + + .quad 0xeb18b9ab7f5745c6 + .quad 0x023a8aee5787c690 + .quad 0xb72712da2df7afa9 + .quad 0x36597d25ea5c013d + .quad 0xa2b4dae0b5511c9a + .quad 0x7ac860292bffff06 + .quad 0x981f375df5504234 + .quad 0x3f6bd725da4ea12d + .quad 0x734d8d7b106058ac + .quad 0xd940579e6fc6905f + .quad 0x6466f8f99202932d + .quad 0x7b7ecc19da60d6d0 + + // 2^176 * 2 * G + + .quad 0x78c2373c695c690d + .quad 0xdd252e660642906e + .quad 0x951d44444ae12bd2 + .quad 0x4235ad7601743956 + .quad 0x6dae4a51a77cfa9b + .quad 0x82263654e7a38650 + .quad 0x09bbffcd8f2d82db + .quad 0x03bedc661bf5caba + .quad 0x6258cb0d078975f5 + .quad 0x492942549189f298 + .quad 0xa0cab423e2e36ee4 + .quad 0x0e7ce2b0cdf066a1 + + // 2^176 * 3 * G + + .quad 0xc494643ac48c85a3 + .quad 0xfd361df43c6139ad + .quad 0x09db17dd3ae94d48 + .quad 0x666e0a5d8fb4674a + .quad 0xfea6fedfd94b70f9 + .quad 0xf130c051c1fcba2d + .quad 0x4882d47e7f2fab89 + .quad 0x615256138aeceeb5 + .quad 0x2abbf64e4870cb0d + .quad 0xcd65bcf0aa458b6b + .quad 0x9abe4eba75e8985d + .quad 0x7f0bc810d514dee4 + + // 2^176 * 4 * G + + .quad 0xb9006ba426f4136f + .quad 0x8d67369e57e03035 + .quad 0xcbc8dfd94f463c28 + .quad 0x0d1f8dbcf8eedbf5 + .quad 0x83ac9dad737213a0 + .quad 0x9ff6f8ba2ef72e98 + .quad 0x311e2edd43ec6957 + .quad 0x1d3a907ddec5ab75 + .quad 0xba1693313ed081dc + .quad 0x29329fad851b3480 + .quad 0x0128013c030321cb + .quad 0x00011b44a31bfde3 + + // 2^176 * 5 * G + + .quad 0x3fdfa06c3fc66c0c + .quad 0x5d40e38e4dd60dd2 + .quad 0x7ae38b38268e4d71 + .quad 0x3ac48d916e8357e1 + .quad 0x16561f696a0aa75c + .quad 0xc1bf725c5852bd6a + .quad 0x11a8dd7f9a7966ad + .quad 0x63d988a2d2851026 + .quad 0x00120753afbd232e + .quad 0xe92bceb8fdd8f683 + .quad 0xf81669b384e72b91 + .quad 0x33fad52b2368a066 + + // 2^176 * 6 * G + + .quad 0x540649c6c5e41e16 + .quad 0x0af86430333f7735 + .quad 0xb2acfcd2f305e746 + .quad 0x16c0f429a256dca7 + .quad 0x8d2cc8d0c422cfe8 + .quad 0x072b4f7b05a13acb + .quad 0xa3feb6e6ecf6a56f + .quad 0x3cc355ccb90a71e2 + .quad 0xe9b69443903e9131 + .quad 0xb8a494cb7a5637ce + .quad 0xc87cd1a4baba9244 + .quad 0x631eaf426bae7568 + + // 2^176 * 7 * G + + .quad 0xb3e90410da66fe9f + .quad 0x85dd4b526c16e5a6 + .quad 0xbc3d97611ef9bf83 + .quad 0x5599648b1ea919b5 + .quad 0x47d975b9a3700de8 + .quad 0x7280c5fbe2f80552 + .quad 0x53658f2732e45de1 + .quad 0x431f2c7f665f80b5 + .quad 0xd6026344858f7b19 + .quad 0x14ab352fa1ea514a + .quad 0x8900441a2090a9d7 + .quad 0x7b04715f91253b26 + + // 2^176 * 8 * G + + .quad 0x83edbd28acf6ae43 + .quad 0x86357c8b7d5c7ab4 + .quad 0xc0404769b7eb2c44 + .quad 0x59b37bf5c2f6583f + .quad 0xb376c280c4e6bac6 + .quad 0x970ed3dd6d1d9b0b + .quad 0xb09a9558450bf944 + .quad 0x48d0acfa57cde223 + .quad 0xb60f26e47dabe671 + .quad 0xf1d1a197622f3a37 + .quad 0x4208ce7ee9960394 + .quad 0x16234191336d3bdb + + // 2^180 * 1 * G + + .quad 0xf19aeac733a63aef + .quad 0x2c7fba5d4442454e + .quad 0x5da87aa04795e441 + .quad 0x413051e1a4e0b0f5 + .quad 0x852dd1fd3d578bbe + .quad 0x2b65ce72c3286108 + .quad 0x658c07f4eace2273 + .quad 0x0933f804ec38ab40 + .quad 0xa7ab69798d496476 + .quad 0x8121aadefcb5abc8 + .quad 0xa5dc12ef7b539472 + .quad 0x07fd47065e45351a + + // 2^180 * 2 * G + + .quad 0xc8583c3d258d2bcd + .quad 0x17029a4daf60b73f + .quad 0xfa0fc9d6416a3781 + .quad 0x1c1e5fba38b3fb23 + .quad 0x304211559ae8e7c3 + .quad 0xf281b229944882a5 + .quad 0x8a13ac2e378250e4 + .quad 0x014afa0954ba48f4 + .quad 0xcb3197001bb3666c + .quad 0x330060524bffecb9 + .quad 0x293711991a88233c + .quad 0x291884363d4ed364 + + // 2^180 * 3 * G + + .quad 0x033c6805dc4babfa + .quad 0x2c15bf5e5596ecc1 + .quad 0x1bc70624b59b1d3b + .quad 0x3ede9850a19f0ec5 + .quad 0xfb9d37c3bc1ab6eb + .quad 0x02be14534d57a240 + .quad 0xf4d73415f8a5e1f6 + .quad 0x5964f4300ccc8188 + .quad 0xe44a23152d096800 + .quad 0x5c08c55970866996 + .quad 0xdf2db60a46affb6e + .quad 0x579155c1f856fd89 + + // 2^180 * 4 * G + + .quad 0x96324edd12e0c9ef + .quad 0x468b878df2420297 + .quad 0x199a3776a4f573be + .quad 0x1e7fbcf18e91e92a + .quad 0xb5f16b630817e7a6 + .quad 0x808c69233c351026 + .quad 0x324a983b54cef201 + .quad 0x53c092084a485345 + .quad 0xd2d41481f1cbafbf + .quad 0x231d2db6716174e5 + .quad 0x0b7d7656e2a55c98 + .quad 0x3e955cd82aa495f6 + + // 2^180 * 5 * G + + .quad 0xe48f535e3ed15433 + .quad 0xd075692a0d7270a3 + .quad 0x40fbd21daade6387 + .quad 0x14264887cf4495f5 + .quad 0xab39f3ef61bb3a3f + .quad 0x8eb400652eb9193e + .quad 0xb5de6ecc38c11f74 + .quad 0x654d7e9626f3c49f + .quad 0xe564cfdd5c7d2ceb + .quad 0x82eeafded737ccb9 + .quad 0x6107db62d1f9b0ab + .quad 0x0b6baac3b4358dbb + + // 2^180 * 6 * G + + .quad 0x7ae62bcb8622fe98 + .quad 0x47762256ceb891af + .quad 0x1a5a92bcf2e406b4 + .quad 0x7d29401784e41501 + .quad 0x204abad63700a93b + .quad 0xbe0023d3da779373 + .quad 0xd85f0346633ab709 + .quad 0x00496dc490820412 + .quad 0x1c74b88dc27e6360 + .quad 0x074854268d14850c + .quad 0xa145fb7b3e0dcb30 + .quad 0x10843f1b43803b23 + + // 2^180 * 7 * G + + .quad 0xc5f90455376276dd + .quad 0xce59158dd7645cd9 + .quad 0x92f65d511d366b39 + .quad 0x11574b6e526996c4 + .quad 0xd56f672de324689b + .quad 0xd1da8aedb394a981 + .quad 0xdd7b58fe9168cfed + .quad 0x7ce246cd4d56c1e8 + .quad 0xb8f4308e7f80be53 + .quad 0x5f3cb8cb34a9d397 + .quad 0x18a961bd33cc2b2c + .quad 0x710045fb3a9af671 + + // 2^180 * 8 * G + + .quad 0x73f93d36101b95eb + .quad 0xfaef33794f6f4486 + .quad 0x5651735f8f15e562 + .quad 0x7fa3f19058b40da1 + .quad 0xa03fc862059d699e + .quad 0x2370cfa19a619e69 + .quad 0xc4fe3b122f823deb + .quad 0x1d1b056fa7f0844e + .quad 0x1bc64631e56bf61f + .quad 0xd379ab106e5382a3 + .quad 0x4d58c57e0540168d + .quad 0x566256628442d8e4 + + // 2^184 * 1 * G + + .quad 0xb9e499def6267ff6 + .quad 0x7772ca7b742c0843 + .quad 0x23a0153fe9a4f2b1 + .quad 0x2cdfdfecd5d05006 + .quad 0xdd499cd61ff38640 + .quad 0x29cd9bc3063625a0 + .quad 0x51e2d8023dd73dc3 + .quad 0x4a25707a203b9231 + .quad 0x2ab7668a53f6ed6a + .quad 0x304242581dd170a1 + .quad 0x4000144c3ae20161 + .quad 0x5721896d248e49fc + + // 2^184 * 2 * G + + .quad 0x0b6e5517fd181bae + .quad 0x9022629f2bb963b4 + .quad 0x5509bce932064625 + .quad 0x578edd74f63c13da + .quad 0x285d5091a1d0da4e + .quad 0x4baa6fa7b5fe3e08 + .quad 0x63e5177ce19393b3 + .quad 0x03c935afc4b030fd + .quad 0x997276c6492b0c3d + .quad 0x47ccc2c4dfe205fc + .quad 0xdcd29b84dd623a3c + .quad 0x3ec2ab590288c7a2 + + // 2^184 * 3 * G + + .quad 0xa1a0d27be4d87bb9 + .quad 0xa98b4deb61391aed + .quad 0x99a0ddd073cb9b83 + .quad 0x2dd5c25a200fcace + .quad 0xa7213a09ae32d1cb + .quad 0x0f2b87df40f5c2d5 + .quad 0x0baea4c6e81eab29 + .quad 0x0e1bf66c6adbac5e + .quad 0xe2abd5e9792c887e + .quad 0x1a020018cb926d5d + .quad 0xbfba69cdbaae5f1e + .quad 0x730548b35ae88f5f + + // 2^184 * 4 * G + + .quad 0xc43551a3cba8b8ee + .quad 0x65a26f1db2115f16 + .quad 0x760f4f52ab8c3850 + .quad 0x3043443b411db8ca + .quad 0x805b094ba1d6e334 + .quad 0xbf3ef17709353f19 + .quad 0x423f06cb0622702b + .quad 0x585a2277d87845dd + .quad 0xa18a5f8233d48962 + .quad 0x6698c4b5ec78257f + .quad 0xa78e6fa5373e41ff + .quad 0x7656278950ef981f + + // 2^184 * 5 * G + + .quad 0x38c3cf59d51fc8c0 + .quad 0x9bedd2fd0506b6f2 + .quad 0x26bf109fab570e8f + .quad 0x3f4160a8c1b846a6 + .quad 0xe17073a3ea86cf9d + .quad 0x3a8cfbb707155fdc + .quad 0x4853e7fc31838a8e + .quad 0x28bbf484b613f616 + .quad 0xf2612f5c6f136c7c + .quad 0xafead107f6dd11be + .quad 0x527e9ad213de6f33 + .quad 0x1e79cb358188f75d + + // 2^184 * 6 * G + + .quad 0x013436c3eef7e3f1 + .quad 0x828b6a7ffe9e10f8 + .quad 0x7ff908e5bcf9defc + .quad 0x65d7951b3a3b3831 + .quad 0x77e953d8f5e08181 + .quad 0x84a50c44299dded9 + .quad 0xdc6c2d0c864525e5 + .quad 0x478ab52d39d1f2f4 + .quad 0x66a6a4d39252d159 + .quad 0xe5dde1bc871ac807 + .quad 0xb82c6b40a6c1c96f + .quad 0x16d87a411a212214 + + // 2^184 * 7 * G + + .quad 0xb3bd7e5a42066215 + .quad 0x879be3cd0c5a24c1 + .quad 0x57c05db1d6f994b7 + .quad 0x28f87c8165f38ca6 + .quad 0xfba4d5e2d54e0583 + .quad 0xe21fafd72ebd99fa + .quad 0x497ac2736ee9778f + .quad 0x1f990b577a5a6dde + .quad 0xa3344ead1be8f7d6 + .quad 0x7d1e50ebacea798f + .quad 0x77c6569e520de052 + .quad 0x45882fe1534d6d3e + + // 2^184 * 8 * G + + .quad 0x6669345d757983d6 + .quad 0x62b6ed1117aa11a6 + .quad 0x7ddd1857985e128f + .quad 0x688fe5b8f626f6dd + .quad 0xd8ac9929943c6fe4 + .quad 0xb5f9f161a38392a2 + .quad 0x2699db13bec89af3 + .quad 0x7dcf843ce405f074 + .quad 0x6c90d6484a4732c0 + .quad 0xd52143fdca563299 + .quad 0xb3be28c3915dc6e1 + .quad 0x6739687e7327191b + + // 2^188 * 1 * G + + .quad 0x9f65c5ea200814cf + .quad 0x840536e169a31740 + .quad 0x8b0ed13925c8b4ad + .quad 0x0080dbafe936361d + .quad 0x8ce5aad0c9cb971f + .quad 0x1156aaa99fd54a29 + .quad 0x41f7247015af9b78 + .quad 0x1fe8cca8420f49aa + .quad 0x72a1848f3c0cc82a + .quad 0x38c560c2877c9e54 + .quad 0x5004e228ce554140 + .quad 0x042418a103429d71 + + // 2^188 * 2 * G + + .quad 0x899dea51abf3ff5f + .quad 0x9b93a8672fc2d8ba + .quad 0x2c38cb97be6ebd5c + .quad 0x114d578497263b5d + .quad 0x58e84c6f20816247 + .quad 0x8db2b2b6e36fd793 + .quad 0x977182561d484d85 + .quad 0x0822024f8632abd7 + .quad 0xb301bb7c6b1beca3 + .quad 0x55393f6dc6eb1375 + .quad 0x910d281097b6e4eb + .quad 0x1ad4548d9d479ea3 + + // 2^188 * 3 * G + + .quad 0xcd5a7da0389a48fd + .quad 0xb38fa4aa9a78371e + .quad 0xc6d9761b2cdb8e6c + .quad 0x35cf51dbc97e1443 + .quad 0xa06fe66d0fe9fed3 + .quad 0xa8733a401c587909 + .quad 0x30d14d800df98953 + .quad 0x41ce5876c7b30258 + .quad 0x59ac3bc5d670c022 + .quad 0xeae67c109b119406 + .quad 0x9798bdf0b3782fda + .quad 0x651e3201fd074092 + + // 2^188 * 4 * G + + .quad 0xd63d8483ef30c5cf + .quad 0x4cd4b4962361cc0c + .quad 0xee90e500a48426ac + .quad 0x0af51d7d18c14eeb + .quad 0xa57ba4a01efcae9e + .quad 0x769f4beedc308a94 + .quad 0xd1f10eeb3603cb2e + .quad 0x4099ce5e7e441278 + .quad 0x1ac98e4f8a5121e9 + .quad 0x7dae9544dbfa2fe0 + .quad 0x8320aa0dd6430df9 + .quad 0x667282652c4a2fb5 + + // 2^188 * 5 * G + + .quad 0x874621f4d86bc9ab + .quad 0xb54c7bbe56fe6fea + .quad 0x077a24257fadc22c + .quad 0x1ab53be419b90d39 + .quad 0xada8b6e02946db23 + .quad 0x1c0ce51a7b253ab7 + .quad 0x8448c85a66dd485b + .quad 0x7f1fc025d0675adf + .quad 0xd8ee1b18319ea6aa + .quad 0x004d88083a21f0da + .quad 0x3bd6aa1d883a4f4b + .quad 0x4db9a3a6dfd9fd14 + + // 2^188 * 6 * G + + .quad 0x8ce7b23bb99c0755 + .quad 0x35c5d6edc4f50f7a + .quad 0x7e1e2ed2ed9b50c3 + .quad 0x36305f16e8934da1 + .quad 0xd95b00bbcbb77c68 + .quad 0xddbc846a91f17849 + .quad 0x7cf700aebe28d9b3 + .quad 0x5ce1285c85d31f3e + .quad 0x31b6972d98b0bde8 + .quad 0x7d920706aca6de5b + .quad 0xe67310f8908a659f + .quad 0x50fac2a6efdf0235 + + // 2^188 * 7 * G + + .quad 0xf3d3a9f35b880f5a + .quad 0xedec050cdb03e7c2 + .quad 0xa896981ff9f0b1a2 + .quad 0x49a4ae2bac5e34a4 + .quad 0x295b1c86f6f449bc + .quad 0x51b2e84a1f0ab4dd + .quad 0xc001cb30aa8e551d + .quad 0x6a28d35944f43662 + .quad 0x28bb12ee04a740e0 + .quad 0x14313bbd9bce8174 + .quad 0x72f5b5e4e8c10c40 + .quad 0x7cbfb19936adcd5b + + // 2^188 * 8 * G + + .quad 0xa311ddc26b89792d + .quad 0x1b30b4c6da512664 + .quad 0x0ca77b4ccf150859 + .quad 0x1de443df1b009408 + .quad 0x8e793a7acc36e6e0 + .quad 0xf9fab7a37d586eed + .quad 0x3a4f9692bae1f4e4 + .quad 0x1c14b03eff5f447e + .quad 0x19647bd114a85291 + .quad 0x57b76cb21034d3af + .quad 0x6329db440f9d6dfa + .quad 0x5ef43e586a571493 + + // 2^192 * 1 * G + + .quad 0xef782014385675a6 + .quad 0xa2649f30aafda9e8 + .quad 0x4cd1eb505cdfa8cb + .quad 0x46115aba1d4dc0b3 + .quad 0xa66dcc9dc80c1ac0 + .quad 0x97a05cf41b38a436 + .quad 0xa7ebf3be95dbd7c6 + .quad 0x7da0b8f68d7e7dab + .quad 0xd40f1953c3b5da76 + .quad 0x1dac6f7321119e9b + .quad 0x03cc6021feb25960 + .quad 0x5a5f887e83674b4b + + // 2^192 * 2 * G + + .quad 0x8f6301cf70a13d11 + .quad 0xcfceb815350dd0c4 + .quad 0xf70297d4a4bca47e + .quad 0x3669b656e44d1434 + .quad 0x9e9628d3a0a643b9 + .quad 0xb5c3cb00e6c32064 + .quad 0x9b5302897c2dec32 + .quad 0x43e37ae2d5d1c70c + .quad 0x387e3f06eda6e133 + .quad 0x67301d5199a13ac0 + .quad 0xbd5ad8f836263811 + .quad 0x6a21e6cd4fd5e9be + + // 2^192 * 3 * G + + .quad 0xf1c6170a3046e65f + .quad 0x58712a2a00d23524 + .quad 0x69dbbd3c8c82b755 + .quad 0x586bf9f1a195ff57 + .quad 0xef4129126699b2e3 + .quad 0x71d30847708d1301 + .quad 0x325432d01182b0bd + .quad 0x45371b07001e8b36 + .quad 0xa6db088d5ef8790b + .quad 0x5278f0dc610937e5 + .quad 0xac0349d261a16eb8 + .quad 0x0eafb03790e52179 + + // 2^192 * 4 * G + + .quad 0x960555c13748042f + .quad 0x219a41e6820baa11 + .quad 0x1c81f73873486d0c + .quad 0x309acc675a02c661 + .quad 0x5140805e0f75ae1d + .quad 0xec02fbe32662cc30 + .quad 0x2cebdf1eea92396d + .quad 0x44ae3344c5435bb3 + .quad 0x9cf289b9bba543ee + .quad 0xf3760e9d5ac97142 + .quad 0x1d82e5c64f9360aa + .quad 0x62d5221b7f94678f + + // 2^192 * 5 * G + + .quad 0x524c299c18d0936d + .quad 0xc86bb56c8a0c1a0c + .quad 0xa375052edb4a8631 + .quad 0x5c0efde4bc754562 + .quad 0x7585d4263af77a3c + .quad 0xdfae7b11fee9144d + .quad 0xa506708059f7193d + .quad 0x14f29a5383922037 + .quad 0xdf717edc25b2d7f5 + .quad 0x21f970db99b53040 + .quad 0xda9234b7c3ed4c62 + .quad 0x5e72365c7bee093e + + // 2^192 * 6 * G + + .quad 0x575bfc074571217f + .quad 0x3779675d0694d95b + .quad 0x9a0a37bbf4191e33 + .quad 0x77f1104c47b4eabc + .quad 0x7d9339062f08b33e + .quad 0x5b9659e5df9f32be + .quad 0xacff3dad1f9ebdfd + .quad 0x70b20555cb7349b7 + .quad 0xbe5113c555112c4c + .quad 0x6688423a9a881fcd + .quad 0x446677855e503b47 + .quad 0x0e34398f4a06404a + + // 2^192 * 7 * G + + .quad 0xb67d22d93ecebde8 + .quad 0x09b3e84127822f07 + .quad 0x743fa61fb05b6d8d + .quad 0x5e5405368a362372 + .quad 0x18930b093e4b1928 + .quad 0x7de3e10e73f3f640 + .quad 0xf43217da73395d6f + .quad 0x6f8aded6ca379c3e + .quad 0xe340123dfdb7b29a + .quad 0x487b97e1a21ab291 + .quad 0xf9967d02fde6949e + .quad 0x780de72ec8d3de97 + + // 2^192 * 8 * G + + .quad 0x0ae28545089ae7bc + .quad 0x388ddecf1c7f4d06 + .quad 0x38ac15510a4811b8 + .quad 0x0eb28bf671928ce4 + .quad 0x671feaf300f42772 + .quad 0x8f72eb2a2a8c41aa + .quad 0x29a17fd797373292 + .quad 0x1defc6ad32b587a6 + .quad 0xaf5bbe1aef5195a7 + .quad 0x148c1277917b15ed + .quad 0x2991f7fb7ae5da2e + .quad 0x467d201bf8dd2867 + + // 2^196 * 1 * G + + .quad 0x7906ee72f7bd2e6b + .quad 0x05d270d6109abf4e + .quad 0x8d5cfe45b941a8a4 + .quad 0x44c218671c974287 + .quad 0x745f9d56296bc318 + .quad 0x993580d4d8152e65 + .quad 0xb0e5b13f5839e9ce + .quad 0x51fc2b28d43921c0 + .quad 0x1b8fd11795e2a98c + .quad 0x1c4e5ee12b6b6291 + .quad 0x5b30e7107424b572 + .quad 0x6e6b9de84c4f4ac6 + + // 2^196 * 2 * G + + .quad 0xdff25fce4b1de151 + .quad 0xd841c0c7e11c4025 + .quad 0x2554b3c854749c87 + .quad 0x2d292459908e0df9 + .quad 0x6b7c5f10f80cb088 + .quad 0x736b54dc56e42151 + .quad 0xc2b620a5c6ef99c4 + .quad 0x5f4c802cc3a06f42 + .quad 0x9b65c8f17d0752da + .quad 0x881ce338c77ee800 + .quad 0xc3b514f05b62f9e3 + .quad 0x66ed5dd5bec10d48 + + // 2^196 * 3 * G + + .quad 0x7d38a1c20bb2089d + .quad 0x808334e196ccd412 + .quad 0xc4a70b8c6c97d313 + .quad 0x2eacf8bc03007f20 + .quad 0xf0adf3c9cbca047d + .quad 0x81c3b2cbf4552f6b + .quad 0xcfda112d44735f93 + .quad 0x1f23a0c77e20048c + .quad 0xf235467be5bc1570 + .quad 0x03d2d9020dbab38c + .quad 0x27529aa2fcf9e09e + .quad 0x0840bef29d34bc50 + + // 2^196 * 4 * G + + .quad 0x796dfb35dc10b287 + .quad 0x27176bcd5c7ff29d + .quad 0x7f3d43e8c7b24905 + .quad 0x0304f5a191c54276 + .quad 0xcd54e06b7f37e4eb + .quad 0x8cc15f87f5e96cca + .quad 0xb8248bb0d3597dce + .quad 0x246affa06074400c + .quad 0x37d88e68fbe45321 + .quad 0x86097548c0d75032 + .quad 0x4e9b13ef894a0d35 + .quad 0x25a83cac5753d325 + + // 2^196 * 5 * G + + .quad 0x10222f48eed8165e + .quad 0x623fc1234b8bcf3a + .quad 0x1e145c09c221e8f0 + .quad 0x7ccfa59fca782630 + .quad 0x9f0f66293952b6e2 + .quad 0x33db5e0e0934267b + .quad 0xff45252bd609fedc + .quad 0x06be10f5c506e0c9 + .quad 0x1a9615a9b62a345f + .quad 0x22050c564a52fecc + .quad 0xa7a2788528bc0dfe + .quad 0x5e82770a1a1ee71d + + // 2^196 * 6 * G + + .quad 0x35425183ad896a5c + .quad 0xe8673afbe78d52f6 + .quad 0x2c66f25f92a35f64 + .quad 0x09d04f3b3b86b102 + .quad 0xe802e80a42339c74 + .quad 0x34175166a7fffae5 + .quad 0x34865d1f1c408cae + .quad 0x2cca982c605bc5ee + .quad 0xfd2d5d35197dbe6e + .quad 0x207c2eea8be4ffa3 + .quad 0x2613d8db325ae918 + .quad 0x7a325d1727741d3e + + // 2^196 * 7 * G + + .quad 0xd036b9bbd16dfde2 + .quad 0xa2055757c497a829 + .quad 0x8e6cc966a7f12667 + .quad 0x4d3b1a791239c180 + .quad 0xecd27d017e2a076a + .quad 0xd788689f1636495e + .quad 0x52a61af0919233e5 + .quad 0x2a479df17bb1ae64 + .quad 0x9e5eee8e33db2710 + .quad 0x189854ded6c43ca5 + .quad 0xa41c22c592718138 + .quad 0x27ad5538a43a5e9b + + // 2^196 * 8 * G + + .quad 0x2746dd4b15350d61 + .quad 0xd03fcbc8ee9521b7 + .quad 0xe86e365a138672ca + .quad 0x510e987f7e7d89e2 + .quad 0xcb5a7d638e47077c + .quad 0x8db7536120a1c059 + .quad 0x549e1e4d8bedfdcc + .quad 0x080153b7503b179d + .quad 0xdda69d930a3ed3e3 + .quad 0x3d386ef1cd60a722 + .quad 0xc817ad58bdaa4ee6 + .quad 0x23be8d554fe7372a + + // 2^200 * 1 * G + + .quad 0x95fe919a74ef4fad + .quad 0x3a827becf6a308a2 + .quad 0x964e01d309a47b01 + .quad 0x71c43c4f5ba3c797 + .quad 0xbc1ef4bd567ae7a9 + .quad 0x3f624cb2d64498bd + .quad 0xe41064d22c1f4ec8 + .quad 0x2ef9c5a5ba384001 + .quad 0xb6fd6df6fa9e74cd + .quad 0xf18278bce4af267a + .quad 0x8255b3d0f1ef990e + .quad 0x5a758ca390c5f293 + + // 2^200 * 2 * G + + .quad 0xa2b72710d9462495 + .quad 0x3aa8c6d2d57d5003 + .quad 0xe3d400bfa0b487ca + .quad 0x2dbae244b3eb72ec + .quad 0x8ce0918b1d61dc94 + .quad 0x8ded36469a813066 + .quad 0xd4e6a829afe8aad3 + .quad 0x0a738027f639d43f + .quad 0x980f4a2f57ffe1cc + .quad 0x00670d0de1839843 + .quad 0x105c3f4a49fb15fd + .quad 0x2698ca635126a69c + + // 2^200 * 3 * G + + .quad 0xe765318832b0ba78 + .quad 0x381831f7925cff8b + .quad 0x08a81b91a0291fcc + .quad 0x1fb43dcc49caeb07 + .quad 0x2e3d702f5e3dd90e + .quad 0x9e3f0918e4d25386 + .quad 0x5e773ef6024da96a + .quad 0x3c004b0c4afa3332 + .quad 0x9aa946ac06f4b82b + .quad 0x1ca284a5a806c4f3 + .quad 0x3ed3265fc6cd4787 + .quad 0x6b43fd01cd1fd217 + + // 2^200 * 4 * G + + .quad 0xc7a75d4b4697c544 + .quad 0x15fdf848df0fffbf + .quad 0x2868b9ebaa46785a + .quad 0x5a68d7105b52f714 + .quad 0xb5c742583e760ef3 + .quad 0x75dc52b9ee0ab990 + .quad 0xbf1427c2072b923f + .quad 0x73420b2d6ff0d9f0 + .quad 0xaf2cf6cb9e851e06 + .quad 0x8f593913c62238c4 + .quad 0xda8ab89699fbf373 + .quad 0x3db5632fea34bc9e + + // 2^200 * 5 * G + + .quad 0xf46eee2bf75dd9d8 + .quad 0x0d17b1f6396759a5 + .quad 0x1bf2d131499e7273 + .quad 0x04321adf49d75f13 + .quad 0x2e4990b1829825d5 + .quad 0xedeaeb873e9a8991 + .quad 0xeef03d394c704af8 + .quad 0x59197ea495df2b0e + .quad 0x04e16019e4e55aae + .quad 0xe77b437a7e2f92e9 + .quad 0xc7ce2dc16f159aa4 + .quad 0x45eafdc1f4d70cc0 + + // 2^200 * 6 * G + + .quad 0x698401858045d72b + .quad 0x4c22faa2cf2f0651 + .quad 0x941a36656b222dc6 + .quad 0x5a5eebc80362dade + .quad 0xb60e4624cfccb1ed + .quad 0x59dbc292bd5c0395 + .quad 0x31a09d1ddc0481c9 + .quad 0x3f73ceea5d56d940 + .quad 0xb7a7bfd10a4e8dc6 + .quad 0xbe57007e44c9b339 + .quad 0x60c1207f1557aefa + .quad 0x26058891266218db + + // 2^200 * 7 * G + + .quad 0x59f704a68360ff04 + .quad 0xc3d93fde7661e6f4 + .quad 0x831b2a7312873551 + .quad 0x54ad0c2e4e615d57 + .quad 0x4c818e3cc676e542 + .quad 0x5e422c9303ceccad + .quad 0xec07cccab4129f08 + .quad 0x0dedfa10b24443b8 + .quad 0xee3b67d5b82b522a + .quad 0x36f163469fa5c1eb + .quad 0xa5b4d2f26ec19fd3 + .quad 0x62ecb2baa77a9408 + + // 2^200 * 8 * G + + .quad 0xe5ed795261152b3d + .quad 0x4962357d0eddd7d1 + .quad 0x7482c8d0b96b4c71 + .quad 0x2e59f919a966d8be + .quad 0x92072836afb62874 + .quad 0x5fcd5e8579e104a5 + .quad 0x5aad01adc630a14a + .quad 0x61913d5075663f98 + .quad 0x0dc62d361a3231da + .quad 0xfa47583294200270 + .quad 0x02d801513f9594ce + .quad 0x3ddbc2a131c05d5c + + // 2^204 * 1 * G + + .quad 0x3f50a50a4ffb81ef + .quad 0xb1e035093bf420bf + .quad 0x9baa8e1cc6aa2cd0 + .quad 0x32239861fa237a40 + .quad 0xfb735ac2004a35d1 + .quad 0x31de0f433a6607c3 + .quad 0x7b8591bfc528d599 + .quad 0x55be9a25f5bb050c + .quad 0x0d005acd33db3dbf + .quad 0x0111b37c80ac35e2 + .quad 0x4892d66c6f88ebeb + .quad 0x770eadb16508fbcd + + // 2^204 * 2 * G + + .quad 0x8451f9e05e4e89dd + .quad 0xc06302ffbc793937 + .quad 0x5d22749556a6495c + .quad 0x09a6755ca05603fb + .quad 0xf1d3b681a05071b9 + .quad 0x2207659a3592ff3a + .quad 0x5f0169297881e40e + .quad 0x16bedd0e86ba374e + .quad 0x5ecccc4f2c2737b5 + .quad 0x43b79e0c2dccb703 + .quad 0x33e008bc4ec43df3 + .quad 0x06c1b840f07566c0 + + // 2^204 * 3 * G + + .quad 0x7688a5c6a388f877 + .quad 0x02a96c14deb2b6ac + .quad 0x64c9f3431b8c2af8 + .quad 0x3628435554a1eed6 + .quad 0x69ee9e7f9b02805c + .quad 0xcbff828a547d1640 + .quad 0x3d93a869b2430968 + .quad 0x46b7b8cd3fe26972 + .quad 0xe9812086fe7eebe0 + .quad 0x4cba6be72f515437 + .quad 0x1d04168b516efae9 + .quad 0x5ea1391043982cb9 + + // 2^204 * 4 * G + + .quad 0x49125c9cf4702ee1 + .quad 0x4520b71f8b25b32d + .quad 0x33193026501fef7e + .quad 0x656d8997c8d2eb2b + .quad 0x6f2b3be4d5d3b002 + .quad 0xafec33d96a09c880 + .quad 0x035f73a4a8bcc4cc + .quad 0x22c5b9284662198b + .quad 0xcb58c8fe433d8939 + .quad 0x89a0cb2e6a8d7e50 + .quad 0x79ca955309fbbe5a + .quad 0x0c626616cd7fc106 + + // 2^204 * 5 * G + + .quad 0x1ffeb80a4879b61f + .quad 0x6396726e4ada21ed + .quad 0x33c7b093368025ba + .quad 0x471aa0c6f3c31788 + .quad 0x8fdfc379fbf454b1 + .quad 0x45a5a970f1a4b771 + .quad 0xac921ef7bad35915 + .quad 0x42d088dca81c2192 + .quad 0x8fda0f37a0165199 + .quad 0x0adadb77c8a0e343 + .quad 0x20fbfdfcc875e820 + .quad 0x1cf2bea80c2206e7 + + // 2^204 * 6 * G + + .quad 0xc2ddf1deb36202ac + .quad 0x92a5fe09d2e27aa5 + .quad 0x7d1648f6fc09f1d3 + .quad 0x74c2cc0513bc4959 + .quad 0x982d6e1a02c0412f + .quad 0x90fa4c83db58e8fe + .quad 0x01c2f5bcdcb18bc0 + .quad 0x686e0c90216abc66 + .quad 0x1fadbadba54395a7 + .quad 0xb41a02a0ae0da66a + .quad 0xbf19f598bba37c07 + .quad 0x6a12b8acde48430d + + // 2^204 * 7 * G + + .quad 0xf8daea1f39d495d9 + .quad 0x592c190e525f1dfc + .quad 0xdb8cbd04c9991d1b + .quad 0x11f7fda3d88f0cb7 + .quad 0x793bdd801aaeeb5f + .quad 0x00a2a0aac1518871 + .quad 0xe8a373a31f2136b4 + .quad 0x48aab888fc91ef19 + .quad 0x041f7e925830f40e + .quad 0x002d6ca979661c06 + .quad 0x86dc9ff92b046a2e + .quad 0x760360928b0493d1 + + // 2^204 * 8 * G + + .quad 0x21bb41c6120cf9c6 + .quad 0xeab2aa12decda59b + .quad 0xc1a72d020aa48b34 + .quad 0x215d4d27e87d3b68 + .quad 0xb43108e5695a0b05 + .quad 0x6cb00ee8ad37a38b + .quad 0x5edad6eea3537381 + .quad 0x3f2602d4b6dc3224 + .quad 0xc8b247b65bcaf19c + .quad 0x49779dc3b1b2c652 + .quad 0x89a180bbd5ece2e2 + .quad 0x13f098a3cec8e039 + + // 2^208 * 1 * G + + .quad 0x9adc0ff9ce5ec54b + .quad 0x039c2a6b8c2f130d + .quad 0x028007c7f0f89515 + .quad 0x78968314ac04b36b + .quad 0xf3aa57a22796bb14 + .quad 0x883abab79b07da21 + .quad 0xe54be21831a0391c + .quad 0x5ee7fb38d83205f9 + .quad 0x538dfdcb41446a8e + .quad 0xa5acfda9434937f9 + .quad 0x46af908d263c8c78 + .quad 0x61d0633c9bca0d09 + + // 2^208 * 2 * G + + .quad 0x63744935ffdb2566 + .quad 0xc5bd6b89780b68bb + .quad 0x6f1b3280553eec03 + .quad 0x6e965fd847aed7f5 + .quad 0xada328bcf8fc73df + .quad 0xee84695da6f037fc + .quad 0x637fb4db38c2a909 + .quad 0x5b23ac2df8067bdc + .quad 0x9ad2b953ee80527b + .quad 0xe88f19aafade6d8d + .quad 0x0e711704150e82cf + .quad 0x79b9bbb9dd95dedc + + // 2^208 * 3 * G + + .quad 0xebb355406a3126c2 + .quad 0xd26383a868c8c393 + .quad 0x6c0c6429e5b97a82 + .quad 0x5065f158c9fd2147 + .quad 0xd1997dae8e9f7374 + .quad 0xa032a2f8cfbb0816 + .quad 0xcd6cba126d445f0a + .quad 0x1ba811460accb834 + .quad 0x708169fb0c429954 + .quad 0xe14600acd76ecf67 + .quad 0x2eaab98a70e645ba + .quad 0x3981f39e58a4faf2 + + // 2^208 * 4 * G + + .quad 0x18fb8a7559230a93 + .quad 0x1d168f6960e6f45d + .quad 0x3a85a94514a93cb5 + .quad 0x38dc083705acd0fd + .quad 0xc845dfa56de66fde + .quad 0xe152a5002c40483a + .quad 0xe9d2e163c7b4f632 + .quad 0x30f4452edcbc1b65 + .quad 0x856d2782c5759740 + .quad 0xfa134569f99cbecc + .quad 0x8844fc73c0ea4e71 + .quad 0x632d9a1a593f2469 + + // 2^208 * 5 * G + + .quad 0xf6bb6b15b807cba6 + .quad 0x1823c7dfbc54f0d7 + .quad 0xbb1d97036e29670b + .quad 0x0b24f48847ed4a57 + .quad 0xbf09fd11ed0c84a7 + .quad 0x63f071810d9f693a + .quad 0x21908c2d57cf8779 + .quad 0x3a5a7df28af64ba2 + .quad 0xdcdad4be511beac7 + .quad 0xa4538075ed26ccf2 + .quad 0xe19cff9f005f9a65 + .quad 0x34fcf74475481f63 + + // 2^208 * 6 * G + + .quad 0xc197e04c789767ca + .quad 0xb8714dcb38d9467d + .quad 0x55de888283f95fa8 + .quad 0x3d3bdc164dfa63f7 + .quad 0xa5bb1dab78cfaa98 + .quad 0x5ceda267190b72f2 + .quad 0x9309c9110a92608e + .quad 0x0119a3042fb374b0 + .quad 0x67a2d89ce8c2177d + .quad 0x669da5f66895d0c1 + .quad 0xf56598e5b282a2b0 + .quad 0x56c088f1ede20a73 + + // 2^208 * 7 * G + + .quad 0x336d3d1110a86e17 + .quad 0xd7f388320b75b2fa + .quad 0xf915337625072988 + .quad 0x09674c6b99108b87 + .quad 0x581b5fac24f38f02 + .quad 0xa90be9febae30cbd + .quad 0x9a2169028acf92f0 + .quad 0x038b7ea48359038f + .quad 0x9f4ef82199316ff8 + .quad 0x2f49d282eaa78d4f + .quad 0x0971a5ab5aef3174 + .quad 0x6e5e31025969eb65 + + // 2^208 * 8 * G + + .quad 0xb16c62f587e593fb + .quad 0x4999eddeca5d3e71 + .quad 0xb491c1e014cc3e6d + .quad 0x08f5114789a8dba8 + .quad 0x3304fb0e63066222 + .quad 0xfb35068987acba3f + .quad 0xbd1924778c1061a3 + .quad 0x3058ad43d1838620 + .quad 0x323c0ffde57663d0 + .quad 0x05c3df38a22ea610 + .quad 0xbdc78abdac994f9a + .quad 0x26549fa4efe3dc99 + + // 2^212 * 1 * G + + .quad 0x738b38d787ce8f89 + .quad 0xb62658e24179a88d + .quad 0x30738c9cf151316d + .quad 0x49128c7f727275c9 + .quad 0x04dbbc17f75396b9 + .quad 0x69e6a2d7d2f86746 + .quad 0xc6409d99f53eabc6 + .quad 0x606175f6332e25d2 + .quad 0x4021370ef540e7dd + .quad 0x0910d6f5a1f1d0a5 + .quad 0x4634aacd5b06b807 + .quad 0x6a39e6356944f235 + + // 2^212 * 2 * G + + .quad 0x96cd5640df90f3e7 + .quad 0x6c3a760edbfa25ea + .quad 0x24f3ef0959e33cc4 + .quad 0x42889e7e530d2e58 + .quad 0x1da1965774049e9d + .quad 0xfbcd6ea198fe352b + .quad 0xb1cbcd50cc5236a6 + .quad 0x1f5ec83d3f9846e2 + .quad 0x8efb23c3328ccb75 + .quad 0xaf42a207dd876ee9 + .quad 0x20fbdadc5dfae796 + .quad 0x241e246b06bf9f51 + + // 2^212 * 3 * G + + .quad 0x29e68e57ad6e98f6 + .quad 0x4c9260c80b462065 + .quad 0x3f00862ea51ebb4b + .quad 0x5bc2c77fb38d9097 + .quad 0x7eaafc9a6280bbb8 + .quad 0x22a70f12f403d809 + .quad 0x31ce40bb1bfc8d20 + .quad 0x2bc65635e8bd53ee + .quad 0xe8d5dc9fa96bad93 + .quad 0xe58fb17dde1947dc + .quad 0x681532ea65185fa3 + .quad 0x1fdd6c3b034a7830 + + // 2^212 * 4 * G + + .quad 0x0a64e28c55dc18fe + .quad 0xe3df9e993399ebdd + .quad 0x79ac432370e2e652 + .quad 0x35ff7fc33ae4cc0e + .quad 0x9c13a6a52dd8f7a9 + .quad 0x2dbb1f8c3efdcabf + .quad 0x961e32405e08f7b5 + .quad 0x48c8a121bbe6c9e5 + .quad 0xfc415a7c59646445 + .quad 0xd224b2d7c128b615 + .quad 0x6035c9c905fbb912 + .quad 0x42d7a91274429fab + + // 2^212 * 5 * G + + .quad 0x4e6213e3eaf72ed3 + .quad 0x6794981a43acd4e7 + .quad 0xff547cde6eb508cb + .quad 0x6fed19dd10fcb532 + .quad 0xa9a48947933da5bc + .quad 0x4a58920ec2e979ec + .quad 0x96d8800013e5ac4c + .quad 0x453692d74b48b147 + .quad 0xdd775d99a8559c6f + .quad 0xf42a2140df003e24 + .quad 0x5223e229da928a66 + .quad 0x063f46ba6d38f22c + + // 2^212 * 6 * G + + .quad 0xd2d242895f536694 + .quad 0xca33a2c542939b2c + .quad 0x986fada6c7ddb95c + .quad 0x5a152c042f712d5d + .quad 0x39843cb737346921 + .quad 0xa747fb0738c89447 + .quad 0xcb8d8031a245307e + .quad 0x67810f8e6d82f068 + .quad 0x3eeb8fbcd2287db4 + .quad 0x72c7d3a301a03e93 + .quad 0x5473e88cbd98265a + .quad 0x7324aa515921b403 + + // 2^212 * 7 * G + + .quad 0x857942f46c3cbe8e + .quad 0xa1d364b14730c046 + .quad 0x1c8ed914d23c41bf + .quad 0x0838e161eef6d5d2 + .quad 0xad23f6dae82354cb + .quad 0x6962502ab6571a6d + .quad 0x9b651636e38e37d1 + .quad 0x5cac5005d1a3312f + .quad 0x8cc154cce9e39904 + .quad 0x5b3a040b84de6846 + .quad 0xc4d8a61cb1be5d6e + .quad 0x40fb897bd8861f02 + + // 2^212 * 8 * G + + .quad 0x84c5aa9062de37a1 + .quad 0x421da5000d1d96e1 + .quad 0x788286306a9242d9 + .quad 0x3c5e464a690d10da + .quad 0xe57ed8475ab10761 + .quad 0x71435e206fd13746 + .quad 0x342f824ecd025632 + .quad 0x4b16281ea8791e7b + .quad 0xd1c101d50b813381 + .quad 0xdee60f1176ee6828 + .quad 0x0cb68893383f6409 + .quad 0x6183c565f6ff484a + + // 2^216 * 1 * G + + .quad 0x741d5a461e6bf9d6 + .quad 0x2305b3fc7777a581 + .quad 0xd45574a26474d3d9 + .quad 0x1926e1dc6401e0ff + .quad 0xdb468549af3f666e + .quad 0xd77fcf04f14a0ea5 + .quad 0x3df23ff7a4ba0c47 + .quad 0x3a10dfe132ce3c85 + .quad 0xe07f4e8aea17cea0 + .quad 0x2fd515463a1fc1fd + .quad 0x175322fd31f2c0f1 + .quad 0x1fa1d01d861e5d15 + + // 2^216 * 2 * G + + .quad 0xcc8055947d599832 + .quad 0x1e4656da37f15520 + .quad 0x99f6f7744e059320 + .quad 0x773563bc6a75cf33 + .quad 0x38dcac00d1df94ab + .quad 0x2e712bddd1080de9 + .quad 0x7f13e93efdd5e262 + .quad 0x73fced18ee9a01e5 + .quad 0x06b1e90863139cb3 + .quad 0xa493da67c5a03ecd + .quad 0x8d77cec8ad638932 + .quad 0x1f426b701b864f44 + + // 2^216 * 3 * G + + .quad 0xefc9264c41911c01 + .quad 0xf1a3b7b817a22c25 + .quad 0x5875da6bf30f1447 + .quad 0x4e1af5271d31b090 + .quad 0xf17e35c891a12552 + .quad 0xb76b8153575e9c76 + .quad 0xfa83406f0d9b723e + .quad 0x0b76bb1b3fa7e438 + .quad 0x08b8c1f97f92939b + .quad 0xbe6771cbd444ab6e + .quad 0x22e5646399bb8017 + .quad 0x7b6dd61eb772a955 + + // 2^216 * 4 * G + + .quad 0xb7adc1e850f33d92 + .quad 0x7998fa4f608cd5cf + .quad 0xad962dbd8dfc5bdb + .quad 0x703e9bceaf1d2f4f + .quad 0x5730abf9ab01d2c7 + .quad 0x16fb76dc40143b18 + .quad 0x866cbe65a0cbb281 + .quad 0x53fa9b659bff6afe + .quad 0x6c14c8e994885455 + .quad 0x843a5d6665aed4e5 + .quad 0x181bb73ebcd65af1 + .quad 0x398d93e5c4c61f50 + + // 2^216 * 5 * G + + .quad 0x1c4bd16733e248f3 + .quad 0xbd9e128715bf0a5f + .quad 0xd43f8cf0a10b0376 + .quad 0x53b09b5ddf191b13 + .quad 0xc3877c60d2e7e3f2 + .quad 0x3b34aaa030828bb1 + .quad 0x283e26e7739ef138 + .quad 0x699c9c9002c30577 + .quad 0xf306a7235946f1cc + .quad 0x921718b5cce5d97d + .quad 0x28cdd24781b4e975 + .quad 0x51caf30c6fcdd907 + + // 2^216 * 6 * G + + .quad 0xa60ba7427674e00a + .quad 0x630e8570a17a7bf3 + .quad 0x3758563dcf3324cc + .quad 0x5504aa292383fdaa + .quad 0x737af99a18ac54c7 + .quad 0x903378dcc51cb30f + .quad 0x2b89bc334ce10cc7 + .quad 0x12ae29c189f8e99a + .quad 0xa99ec0cb1f0d01cf + .quad 0x0dd1efcc3a34f7ae + .quad 0x55ca7521d09c4e22 + .quad 0x5fd14fe958eba5ea + + // 2^216 * 7 * G + + .quad 0xb5dc2ddf2845ab2c + .quad 0x069491b10a7fe993 + .quad 0x4daaf3d64002e346 + .quad 0x093ff26e586474d1 + .quad 0x3c42fe5ebf93cb8e + .quad 0xbedfa85136d4565f + .quad 0xe0f0859e884220e8 + .quad 0x7dd73f960725d128 + .quad 0xb10d24fe68059829 + .quad 0x75730672dbaf23e5 + .quad 0x1367253ab457ac29 + .quad 0x2f59bcbc86b470a4 + + // 2^216 * 8 * G + + .quad 0x83847d429917135f + .quad 0xad1b911f567d03d7 + .quad 0x7e7748d9be77aad1 + .quad 0x5458b42e2e51af4a + .quad 0x7041d560b691c301 + .quad 0x85201b3fadd7e71e + .quad 0x16c2e16311335585 + .quad 0x2aa55e3d010828b1 + .quad 0xed5192e60c07444f + .quad 0x42c54e2d74421d10 + .quad 0x352b4c82fdb5c864 + .quad 0x13e9004a8a768664 + + // 2^220 * 1 * G + + .quad 0xcbb5b5556c032bff + .quad 0xdf7191b729297a3a + .quad 0xc1ff7326aded81bb + .quad 0x71ade8bb68be03f5 + .quad 0x1e6284c5806b467c + .quad 0xc5f6997be75d607b + .quad 0x8b67d958b378d262 + .quad 0x3d88d66a81cd8b70 + .quad 0x8b767a93204ed789 + .quad 0x762fcacb9fa0ae2a + .quad 0x771febcc6dce4887 + .quad 0x343062158ff05fb3 + + // 2^220 * 2 * G + + .quad 0xe05da1a7e1f5bf49 + .quad 0x26457d6dd4736092 + .quad 0x77dcb07773cc32f6 + .quad 0x0a5d94969cdd5fcd + .quad 0xfce219072a7b31b4 + .quad 0x4d7adc75aa578016 + .quad 0x0ec276a687479324 + .quad 0x6d6d9d5d1fda4beb + .quad 0x22b1a58ae9b08183 + .quad 0xfd95d071c15c388b + .quad 0xa9812376850a0517 + .quad 0x33384cbabb7f335e + + // 2^220 * 3 * G + + .quad 0x3c6fa2680ca2c7b5 + .quad 0x1b5082046fb64fda + .quad 0xeb53349c5431d6de + .quad 0x5278b38f6b879c89 + .quad 0x33bc627a26218b8d + .quad 0xea80b21fc7a80c61 + .quad 0x9458b12b173e9ee6 + .quad 0x076247be0e2f3059 + .quad 0x52e105f61416375a + .quad 0xec97af3685abeba4 + .quad 0x26e6b50623a67c36 + .quad 0x5cf0e856f3d4fb01 + + // 2^220 * 4 * G + + .quad 0xf6c968731ae8cab4 + .quad 0x5e20741ecb4f92c5 + .quad 0x2da53be58ccdbc3e + .quad 0x2dddfea269970df7 + .quad 0xbeaece313db342a8 + .quad 0xcba3635b842db7ee + .quad 0xe88c6620817f13ef + .quad 0x1b9438aa4e76d5c6 + .quad 0x8a50777e166f031a + .quad 0x067b39f10fb7a328 + .quad 0x1925c9a6010fbd76 + .quad 0x6df9b575cc740905 + + // 2^220 * 5 * G + + .quad 0x42c1192927f6bdcf + .quad 0x8f91917a403d61ca + .quad 0xdc1c5a668b9e1f61 + .quad 0x1596047804ec0f8d + .quad 0xecdfc35b48cade41 + .quad 0x6a88471fb2328270 + .quad 0x740a4a2440a01b6a + .quad 0x471e5796003b5f29 + .quad 0xda96bbb3aced37ac + .quad 0x7a2423b5e9208cea + .quad 0x24cc5c3038aebae2 + .quad 0x50c356afdc5dae2f + + // 2^220 * 6 * G + + .quad 0x09dcbf4341c30318 + .quad 0xeeba061183181dce + .quad 0xc179c0cedc1e29a1 + .quad 0x1dbf7b89073f35b0 + .quad 0xcfed9cdf1b31b964 + .quad 0xf486a9858ca51af3 + .quad 0x14897265ea8c1f84 + .quad 0x784a53dd932acc00 + .quad 0x2d99f9df14fc4920 + .quad 0x76ccb60cc4499fe5 + .quad 0xa4132cbbe5cf0003 + .quad 0x3f93d82354f000ea + + // 2^220 * 7 * G + + .quad 0x8183e7689e04ce85 + .quad 0x678fb71e04465341 + .quad 0xad92058f6688edac + .quad 0x5da350d3532b099a + .quad 0xeaac12d179e14978 + .quad 0xff923ff3bbebff5e + .quad 0x4af663e40663ce27 + .quad 0x0fd381a811a5f5ff + .quad 0xf256aceca436df54 + .quad 0x108b6168ae69d6e8 + .quad 0x20d986cb6b5d036c + .quad 0x655957b9fee2af50 + + // 2^220 * 8 * G + + .quad 0xaea8b07fa902030f + .quad 0xf88c766af463d143 + .quad 0x15b083663c787a60 + .quad 0x08eab1148267a4a8 + .quad 0xbdc1409bd002d0ac + .quad 0x66660245b5ccd9a6 + .quad 0x82317dc4fade85ec + .quad 0x02fe934b6ad7df0d + .quad 0xef5cf100cfb7ea74 + .quad 0x22897633a1cb42ac + .quad 0xd4ce0c54cef285e2 + .quad 0x30408c048a146a55 + + // 2^224 * 1 * G + + .quad 0x739d8845832fcedb + .quad 0xfa38d6c9ae6bf863 + .quad 0x32bc0dcab74ffef7 + .quad 0x73937e8814bce45e + .quad 0xbb2e00c9193b877f + .quad 0xece3a890e0dc506b + .quad 0xecf3b7c036de649f + .quad 0x5f46040898de9e1a + .quad 0xb9037116297bf48d + .quad 0xa9d13b22d4f06834 + .quad 0xe19715574696bdc6 + .quad 0x2cf8a4e891d5e835 + + // 2^224 * 2 * G + + .quad 0x6d93fd8707110f67 + .quad 0xdd4c09d37c38b549 + .quad 0x7cb16a4cc2736a86 + .quad 0x2049bd6e58252a09 + .quad 0x2cb5487e17d06ba2 + .quad 0x24d2381c3950196b + .quad 0xd7659c8185978a30 + .quad 0x7a6f7f2891d6a4f6 + .quad 0x7d09fd8d6a9aef49 + .quad 0xf0ee60be5b3db90b + .quad 0x4c21b52c519ebfd4 + .quad 0x6011aadfc545941d + + // 2^224 * 3 * G + + .quad 0x5f67926dcf95f83c + .quad 0x7c7e856171289071 + .quad 0xd6a1e7f3998f7a5b + .quad 0x6fc5cc1b0b62f9e0 + .quad 0x63ded0c802cbf890 + .quad 0xfbd098ca0dff6aaa + .quad 0x624d0afdb9b6ed99 + .quad 0x69ce18b779340b1e + .quad 0xd1ef5528b29879cb + .quad 0xdd1aae3cd47e9092 + .quad 0x127e0442189f2352 + .quad 0x15596b3ae57101f1 + + // 2^224 * 4 * G + + .quad 0x462739d23f9179a2 + .quad 0xff83123197d6ddcf + .quad 0x1307deb553f2148a + .quad 0x0d2237687b5f4dda + .quad 0x09ff31167e5124ca + .quad 0x0be4158bd9c745df + .quad 0x292b7d227ef556e5 + .quad 0x3aa4e241afb6d138 + .quad 0x2cc138bf2a3305f5 + .quad 0x48583f8fa2e926c3 + .quad 0x083ab1a25549d2eb + .quad 0x32fcaa6e4687a36c + + // 2^224 * 5 * G + + .quad 0x7bc56e8dc57d9af5 + .quad 0x3e0bd2ed9df0bdf2 + .quad 0xaac014de22efe4a3 + .quad 0x4627e9cefebd6a5c + .quad 0x3207a4732787ccdf + .quad 0x17e31908f213e3f8 + .quad 0xd5b2ecd7f60d964e + .quad 0x746f6336c2600be9 + .quad 0x3f4af345ab6c971c + .quad 0xe288eb729943731f + .quad 0x33596a8a0344186d + .quad 0x7b4917007ed66293 + + // 2^224 * 6 * G + + .quad 0x2d85fb5cab84b064 + .quad 0x497810d289f3bc14 + .quad 0x476adc447b15ce0c + .quad 0x122ba376f844fd7b + .quad 0x54341b28dd53a2dd + .quad 0xaa17905bdf42fc3f + .quad 0x0ff592d94dd2f8f4 + .quad 0x1d03620fe08cd37d + .quad 0xc20232cda2b4e554 + .quad 0x9ed0fd42115d187f + .quad 0x2eabb4be7dd479d9 + .quad 0x02c70bf52b68ec4c + + // 2^224 * 7 * G + + .quad 0xa287ec4b5d0b2fbb + .quad 0x415c5790074882ca + .quad 0xe044a61ec1d0815c + .quad 0x26334f0a409ef5e0 + .quad 0xace532bf458d72e1 + .quad 0x5be768e07cb73cb5 + .quad 0x56cf7d94ee8bbde7 + .quad 0x6b0697e3feb43a03 + .quad 0xb6c8f04adf62a3c0 + .quad 0x3ef000ef076da45d + .quad 0x9c9cb95849f0d2a9 + .quad 0x1cc37f43441b2fae + + // 2^224 * 8 * G + + .quad 0x508f565a5cc7324f + .quad 0xd061c4c0e506a922 + .quad 0xfb18abdb5c45ac19 + .quad 0x6c6809c10380314a + .quad 0xd76656f1c9ceaeb9 + .quad 0x1c5b15f818e5656a + .quad 0x26e72832844c2334 + .quad 0x3a346f772f196838 + .quad 0xd2d55112e2da6ac8 + .quad 0xe9bd0331b1e851ed + .quad 0x960746dd8ec67262 + .quad 0x05911b9f6ef7c5d0 + + // 2^228 * 1 * G + + .quad 0xe9dcd756b637ff2d + .quad 0xec4c348fc987f0c4 + .quad 0xced59285f3fbc7b7 + .quad 0x3305354793e1ea87 + .quad 0x01c18980c5fe9f94 + .quad 0xcd656769716fd5c8 + .quad 0x816045c3d195a086 + .quad 0x6e2b7f3266cc7982 + .quad 0xcc802468f7c3568f + .quad 0x9de9ba8219974cb3 + .quad 0xabb7229cb5b81360 + .quad 0x44e2017a6fbeba62 + + // 2^228 * 2 * G + + .quad 0xc4c2a74354dab774 + .quad 0x8e5d4c3c4eaf031a + .quad 0xb76c23d242838f17 + .quad 0x749a098f68dce4ea + .quad 0x87f82cf3b6ca6ecd + .quad 0x580f893e18f4a0c2 + .quad 0x058930072604e557 + .quad 0x6cab6ac256d19c1d + .quad 0xdcdfe0a02cc1de60 + .quad 0x032665ff51c5575b + .quad 0x2c0c32f1073abeeb + .quad 0x6a882014cd7b8606 + + // 2^228 * 3 * G + + .quad 0xa52a92fea4747fb5 + .quad 0xdc12a4491fa5ab89 + .quad 0xd82da94bb847a4ce + .quad 0x4d77edce9512cc4e + .quad 0xd111d17caf4feb6e + .quad 0x050bba42b33aa4a3 + .quad 0x17514c3ceeb46c30 + .quad 0x54bedb8b1bc27d75 + .quad 0x77c8e14577e2189c + .quad 0xa3e46f6aff99c445 + .quad 0x3144dfc86d335343 + .quad 0x3a96559e7c4216a9 + + // 2^228 * 4 * G + + .quad 0x12550d37f42ad2ee + .quad 0x8b78e00498a1fbf5 + .quad 0x5d53078233894cb2 + .quad 0x02c84e4e3e498d0c + .quad 0x4493896880baaa52 + .quad 0x4c98afc4f285940e + .quad 0xef4aa79ba45448b6 + .quad 0x5278c510a57aae7f + .quad 0xa54dd074294c0b94 + .quad 0xf55d46b8df18ffb6 + .quad 0xf06fecc58dae8366 + .quad 0x588657668190d165 + + // 2^228 * 5 * G + + .quad 0xd47712311aef7117 + .quad 0x50343101229e92c7 + .quad 0x7a95e1849d159b97 + .quad 0x2449959b8b5d29c9 + .quad 0xbf5834f03de25cc3 + .quad 0xb887c8aed6815496 + .quad 0x5105221a9481e892 + .quad 0x6760ed19f7723f93 + .quad 0x669ba3b7ac35e160 + .quad 0x2eccf73fba842056 + .quad 0x1aec1f17c0804f07 + .quad 0x0d96bc031856f4e7 + + // 2^228 * 6 * G + + .quad 0x3318be7775c52d82 + .quad 0x4cb764b554d0aab9 + .quad 0xabcf3d27cc773d91 + .quad 0x3bf4d1848123288a + .quad 0xb1d534b0cc7505e1 + .quad 0x32cd003416c35288 + .quad 0xcb36a5800762c29d + .quad 0x5bfe69b9237a0bf8 + .quad 0x183eab7e78a151ab + .quad 0xbbe990c999093763 + .quad 0xff717d6e4ac7e335 + .quad 0x4c5cddb325f39f88 + + // 2^228 * 7 * G + + .quad 0xc0f6b74d6190a6eb + .quad 0x20ea81a42db8f4e4 + .quad 0xa8bd6f7d97315760 + .quad 0x33b1d60262ac7c21 + .quad 0x57750967e7a9f902 + .quad 0x2c37fdfc4f5b467e + .quad 0xb261663a3177ba46 + .quad 0x3a375e78dc2d532b + .quad 0x8141e72f2d4dddea + .quad 0xe6eafe9862c607c8 + .quad 0x23c28458573cafd0 + .quad 0x46b9476f4ff97346 + + // 2^228 * 8 * G + + .quad 0x0c1ffea44f901e5c + .quad 0x2b0b6fb72184b782 + .quad 0xe587ff910114db88 + .quad 0x37130f364785a142 + .quad 0x1215505c0d58359f + .quad 0x2a2013c7fc28c46b + .quad 0x24a0a1af89ea664e + .quad 0x4400b638a1130e1f + .quad 0x3a01b76496ed19c3 + .quad 0x31e00ab0ed327230 + .quad 0x520a885783ca15b1 + .quad 0x06aab9875accbec7 + + // 2^232 * 1 * G + + .quad 0xc1339983f5df0ebb + .quad 0xc0f3758f512c4cac + .quad 0x2cf1130a0bb398e1 + .quad 0x6b3cecf9aa270c62 + .quad 0x5349acf3512eeaef + .quad 0x20c141d31cc1cb49 + .quad 0x24180c07a99a688d + .quad 0x555ef9d1c64b2d17 + .quad 0x36a770ba3b73bd08 + .quad 0x624aef08a3afbf0c + .quad 0x5737ff98b40946f2 + .quad 0x675f4de13381749d + + // 2^232 * 2 * G + + .quad 0x0e2c52036b1782fc + .quad 0x64816c816cad83b4 + .quad 0xd0dcbdd96964073e + .quad 0x13d99df70164c520 + .quad 0xa12ff6d93bdab31d + .quad 0x0725d80f9d652dfe + .quad 0x019c4ff39abe9487 + .quad 0x60f450b882cd3c43 + .quad 0x014b5ec321e5c0ca + .quad 0x4fcb69c9d719bfa2 + .quad 0x4e5f1c18750023a0 + .quad 0x1c06de9e55edac80 + + // 2^232 * 3 * G + + .quad 0x990f7ad6a33ec4e2 + .quad 0x6608f938be2ee08e + .quad 0x9ca143c563284515 + .quad 0x4cf38a1fec2db60d + .quad 0xffd52b40ff6d69aa + .quad 0x34530b18dc4049bb + .quad 0x5e4a5c2fa34d9897 + .quad 0x78096f8e7d32ba2d + .quad 0xa0aaaa650dfa5ce7 + .quad 0xf9c49e2a48b5478c + .quad 0x4f09cc7d7003725b + .quad 0x373cad3a26091abe + + // 2^232 * 4 * G + + .quad 0xb294634d82c9f57c + .quad 0x1fcbfde124934536 + .quad 0x9e9c4db3418cdb5a + .quad 0x0040f3d9454419fc + .quad 0xf1bea8fb89ddbbad + .quad 0x3bcb2cbc61aeaecb + .quad 0x8f58a7bb1f9b8d9d + .quad 0x21547eda5112a686 + .quad 0xdefde939fd5986d3 + .quad 0xf4272c89510a380c + .quad 0xb72ba407bb3119b9 + .quad 0x63550a334a254df4 + + // 2^232 * 5 * G + + .quad 0x6507d6edb569cf37 + .quad 0x178429b00ca52ee1 + .quad 0xea7c0090eb6bd65d + .quad 0x3eea62c7daf78f51 + .quad 0x9bba584572547b49 + .quad 0xf305c6fae2c408e0 + .quad 0x60e8fa69c734f18d + .quad 0x39a92bafaa7d767a + .quad 0x9d24c713e693274e + .quad 0x5f63857768dbd375 + .quad 0x70525560eb8ab39a + .quad 0x68436a0665c9c4cd + + // 2^232 * 6 * G + + .quad 0xbc0235e8202f3f27 + .quad 0xc75c00e264f975b0 + .quad 0x91a4e9d5a38c2416 + .quad 0x17b6e7f68ab789f9 + .quad 0x1e56d317e820107c + .quad 0xc5266844840ae965 + .quad 0xc1e0a1c6320ffc7a + .quad 0x5373669c91611472 + .quad 0x5d2814ab9a0e5257 + .quad 0x908f2084c9cab3fc + .quad 0xafcaf5885b2d1eca + .quad 0x1cb4b5a678f87d11 + + // 2^232 * 7 * G + + .quad 0xb664c06b394afc6c + .quad 0x0c88de2498da5fb1 + .quad 0x4f8d03164bcad834 + .quad 0x330bca78de7434a2 + .quad 0x6b74aa62a2a007e7 + .quad 0xf311e0b0f071c7b1 + .quad 0x5707e438000be223 + .quad 0x2dc0fd2d82ef6eac + .quad 0x982eff841119744e + .quad 0xf9695e962b074724 + .quad 0xc58ac14fbfc953fb + .quad 0x3c31be1b369f1cf5 + + // 2^232 * 8 * G + + .quad 0xb0f4864d08948aee + .quad 0x07dc19ee91ba1c6f + .quad 0x7975cdaea6aca158 + .quad 0x330b61134262d4bb + .quad 0xc168bc93f9cb4272 + .quad 0xaeb8711fc7cedb98 + .quad 0x7f0e52aa34ac8d7a + .quad 0x41cec1097e7d55bb + .quad 0xf79619d7a26d808a + .quad 0xbb1fd49e1d9e156d + .quad 0x73d7c36cdba1df27 + .quad 0x26b44cd91f28777d + + // 2^236 * 1 * G + + .quad 0x300a9035393aa6d8 + .quad 0x2b501131a12bb1cd + .quad 0x7b1ff677f093c222 + .quad 0x4309c1f8cab82bad + .quad 0xaf44842db0285f37 + .quad 0x8753189047efc8df + .quad 0x9574e091f820979a + .quad 0x0e378d6069615579 + .quad 0xd9fa917183075a55 + .quad 0x4bdb5ad26b009fdc + .quad 0x7829ad2cd63def0e + .quad 0x078fc54975fd3877 + + // 2^236 * 2 * G + + .quad 0x87dfbd1428878f2d + .quad 0x134636dd1e9421a1 + .quad 0x4f17c951257341a3 + .quad 0x5df98d4bad296cb8 + .quad 0xe2004b5bb833a98a + .quad 0x44775dec2d4c3330 + .quad 0x3aa244067eace913 + .quad 0x272630e3d58e00a9 + .quad 0xf3678fd0ecc90b54 + .quad 0xf001459b12043599 + .quad 0x26725fbc3758b89b + .quad 0x4325e4aa73a719ae + + // 2^236 * 3 * G + + .quad 0x657dc6ef433c3493 + .quad 0x65375e9f80dbf8c3 + .quad 0x47fd2d465b372dae + .quad 0x4966ab79796e7947 + .quad 0xed24629acf69f59d + .quad 0x2a4a1ccedd5abbf4 + .quad 0x3535ca1f56b2d67b + .quad 0x5d8c68d043b1b42d + .quad 0xee332d4de3b42b0a + .quad 0xd84e5a2b16a4601c + .quad 0x78243877078ba3e4 + .quad 0x77ed1eb4184ee437 + + // 2^236 * 4 * G + + .quad 0xbfd4e13f201839a0 + .quad 0xaeefffe23e3df161 + .quad 0xb65b04f06b5d1fe3 + .quad 0x52e085fb2b62fbc0 + .quad 0x185d43f89e92ed1a + .quad 0xb04a1eeafe4719c6 + .quad 0x499fbe88a6f03f4f + .quad 0x5d8b0d2f3c859bdd + .quad 0x124079eaa54cf2ba + .quad 0xd72465eb001b26e7 + .quad 0x6843bcfdc97af7fd + .quad 0x0524b42b55eacd02 + + // 2^236 * 5 * G + + .quad 0xfd0d5dbee45447b0 + .quad 0x6cec351a092005ee + .quad 0x99a47844567579cb + .quad 0x59d242a216e7fa45 + .quad 0xbc18dcad9b829eac + .quad 0x23ae7d28b5f579d0 + .quad 0xc346122a69384233 + .quad 0x1a6110b2e7d4ac89 + .quad 0x4f833f6ae66997ac + .quad 0x6849762a361839a4 + .quad 0x6985dec1970ab525 + .quad 0x53045e89dcb1f546 + + // 2^236 * 6 * G + + .quad 0xcb8bb346d75353db + .quad 0xfcfcb24bae511e22 + .quad 0xcba48d40d50ae6ef + .quad 0x26e3bae5f4f7cb5d + .quad 0x84da3cde8d45fe12 + .quad 0xbd42c218e444e2d2 + .quad 0xa85196781f7e3598 + .quad 0x7642c93f5616e2b2 + .quad 0x2323daa74595f8e4 + .quad 0xde688c8b857abeb4 + .quad 0x3fc48e961c59326e + .quad 0x0b2e73ca15c9b8ba + + // 2^236 * 7 * G + + .quad 0xd6bb4428c17f5026 + .quad 0x9eb27223fb5a9ca7 + .quad 0xe37ba5031919c644 + .quad 0x21ce380db59a6602 + .quad 0x0e3fbfaf79c03a55 + .quad 0x3077af054cbb5acf + .quad 0xd5c55245db3de39f + .quad 0x015e68c1476a4af7 + .quad 0xc1d5285220066a38 + .quad 0x95603e523570aef3 + .quad 0x832659a7226b8a4d + .quad 0x5dd689091f8eedc9 + + // 2^236 * 8 * G + + .quad 0xcbac84debfd3c856 + .quad 0x1624c348b35ff244 + .quad 0xb7f88dca5d9cad07 + .quad 0x3b0e574da2c2ebe8 + .quad 0x1d022591a5313084 + .quad 0xca2d4aaed6270872 + .quad 0x86a12b852f0bfd20 + .quad 0x56e6c439ad7da748 + .quad 0xc704ff4942bdbae6 + .quad 0x5e21ade2b2de1f79 + .quad 0xe95db3f35652fad8 + .quad 0x0822b5378f08ebc1 + + // 2^240 * 1 * G + + .quad 0x51f048478f387475 + .quad 0xb25dbcf49cbecb3c + .quad 0x9aab1244d99f2055 + .quad 0x2c709e6c1c10a5d6 + .quad 0xe1b7f29362730383 + .quad 0x4b5279ffebca8a2c + .quad 0xdafc778abfd41314 + .quad 0x7deb10149c72610f + .quad 0xcb62af6a8766ee7a + .quad 0x66cbec045553cd0e + .quad 0x588001380f0be4b5 + .quad 0x08e68e9ff62ce2ea + + // 2^240 * 2 * G + + .quad 0x34ad500a4bc130ad + .quad 0x8d38db493d0bd49c + .quad 0xa25c3d98500a89be + .quad 0x2f1f3f87eeba3b09 + .quad 0x2f2d09d50ab8f2f9 + .quad 0xacb9218dc55923df + .quad 0x4a8f342673766cb9 + .quad 0x4cb13bd738f719f5 + .quad 0xf7848c75e515b64a + .quad 0xa59501badb4a9038 + .quad 0xc20d313f3f751b50 + .quad 0x19a1e353c0ae2ee8 + + // 2^240 * 3 * G + + .quad 0x7d1c7560bafa05c3 + .quad 0xb3e1a0a0c6e55e61 + .quad 0xe3529718c0d66473 + .quad 0x41546b11c20c3486 + .quad 0xb42172cdd596bdbd + .quad 0x93e0454398eefc40 + .quad 0x9fb15347b44109b5 + .quad 0x736bd3990266ae34 + .quad 0x85532d509334b3b4 + .quad 0x46fd114b60816573 + .quad 0xcc5f5f30425c8375 + .quad 0x412295a2b87fab5c + + // 2^240 * 4 * G + + .quad 0x19c99b88f57ed6e9 + .quad 0x5393cb266df8c825 + .quad 0x5cee3213b30ad273 + .quad 0x14e153ebb52d2e34 + .quad 0x2e655261e293eac6 + .quad 0x845a92032133acdb + .quad 0x460975cb7900996b + .quad 0x0760bb8d195add80 + .quad 0x413e1a17cde6818a + .quad 0x57156da9ed69a084 + .quad 0x2cbf268f46caccb1 + .quad 0x6b34be9bc33ac5f2 + + // 2^240 * 5 * G + + .quad 0xf3df2f643a78c0b2 + .quad 0x4c3e971ef22e027c + .quad 0xec7d1c5e49c1b5a3 + .quad 0x2012c18f0922dd2d + .quad 0x11fc69656571f2d3 + .quad 0xc6c9e845530e737a + .quad 0xe33ae7a2d4fe5035 + .quad 0x01b9c7b62e6dd30b + .quad 0x880b55e55ac89d29 + .quad 0x1483241f45a0a763 + .quad 0x3d36efdfc2e76c1f + .quad 0x08af5b784e4bade8 + + // 2^240 * 6 * G + + .quad 0x283499dc881f2533 + .quad 0x9d0525da779323b6 + .quad 0x897addfb673441f4 + .quad 0x32b79d71163a168d + .quad 0xe27314d289cc2c4b + .quad 0x4be4bd11a287178d + .quad 0x18d528d6fa3364ce + .quad 0x6423c1d5afd9826e + .quad 0xcc85f8d9edfcb36a + .quad 0x22bcc28f3746e5f9 + .quad 0xe49de338f9e5d3cd + .quad 0x480a5efbc13e2dcc + + // 2^240 * 7 * G + + .quad 0x0b51e70b01622071 + .quad 0x06b505cf8b1dafc5 + .quad 0x2c6bb061ef5aabcd + .quad 0x47aa27600cb7bf31 + .quad 0xb6614ce442ce221f + .quad 0x6e199dcc4c053928 + .quad 0x663fb4a4dc1cbe03 + .quad 0x24b31d47691c8e06 + .quad 0x2a541eedc015f8c3 + .quad 0x11a4fe7e7c693f7c + .quad 0xf0af66134ea278d6 + .quad 0x545b585d14dda094 + + // 2^240 * 8 * G + + .quad 0x67bf275ea0d43a0f + .quad 0xade68e34089beebe + .quad 0x4289134cd479e72e + .quad 0x0f62f9c332ba5454 + .quad 0x6204e4d0e3b321e1 + .quad 0x3baa637a28ff1e95 + .quad 0x0b0ccffd5b99bd9e + .quad 0x4d22dc3e64c8d071 + .quad 0xfcb46589d63b5f39 + .quad 0x5cae6a3f57cbcf61 + .quad 0xfebac2d2953afa05 + .quad 0x1c0fa01a36371436 + + // 2^244 * 1 * G + + .quad 0xe7547449bc7cd692 + .quad 0x0f9abeaae6f73ddf + .quad 0x4af01ca700837e29 + .quad 0x63ab1b5d3f1bc183 + .quad 0xc11ee5e854c53fae + .quad 0x6a0b06c12b4f3ff4 + .quad 0x33540f80e0b67a72 + .quad 0x15f18fc3cd07e3ef + .quad 0x32750763b028f48c + .quad 0x06020740556a065f + .quad 0xd53bd812c3495b58 + .quad 0x08706c9b865f508d + + // 2^244 * 2 * G + + .quad 0xf37ca2ab3d343dff + .quad 0x1a8c6a2d80abc617 + .quad 0x8e49e035d4ccffca + .quad 0x48b46beebaa1d1b9 + .quad 0xcc991b4138b41246 + .quad 0x243b9c526f9ac26b + .quad 0xb9ef494db7cbabbd + .quad 0x5fba433dd082ed00 + .quad 0x9c49e355c9941ad0 + .quad 0xb9734ade74498f84 + .quad 0x41c3fed066663e5c + .quad 0x0ecfedf8e8e710b3 + + // 2^244 * 3 * G + + .quad 0x76430f9f9cd470d9 + .quad 0xb62acc9ba42f6008 + .quad 0x1898297c59adad5e + .quad 0x7789dd2db78c5080 + .quad 0x744f7463e9403762 + .quad 0xf79a8dee8dfcc9c9 + .quad 0x163a649655e4cde3 + .quad 0x3b61788db284f435 + .quad 0xb22228190d6ef6b2 + .quad 0xa94a66b246ce4bfa + .quad 0x46c1a77a4f0b6cc7 + .quad 0x4236ccffeb7338cf + + // 2^244 * 4 * G + + .quad 0x8497404d0d55e274 + .quad 0x6c6663d9c4ad2b53 + .quad 0xec2fb0d9ada95734 + .quad 0x2617e120cdb8f73c + .quad 0x3bd82dbfda777df6 + .quad 0x71b177cc0b98369e + .quad 0x1d0e8463850c3699 + .quad 0x5a71945b48e2d1f1 + .quad 0x6f203dd5405b4b42 + .quad 0x327ec60410b24509 + .quad 0x9c347230ac2a8846 + .quad 0x77de29fc11ffeb6a + + // 2^244 * 5 * G + + .quad 0xb0ac57c983b778a8 + .quad 0x53cdcca9d7fe912c + .quad 0x61c2b854ff1f59dc + .quad 0x3a1a2cf0f0de7dac + .quad 0x835e138fecced2ca + .quad 0x8c9eaf13ea963b9a + .quad 0xc95fbfc0b2160ea6 + .quad 0x575e66f3ad877892 + .quad 0x99803a27c88fcb3a + .quad 0x345a6789275ec0b0 + .quad 0x459789d0ff6c2be5 + .quad 0x62f882651e70a8b2 + + // 2^244 * 6 * G + + .quad 0x085ae2c759ff1be4 + .quad 0x149145c93b0e40b7 + .quad 0xc467e7fa7ff27379 + .quad 0x4eeecf0ad5c73a95 + .quad 0x6d822986698a19e0 + .quad 0xdc9821e174d78a71 + .quad 0x41a85f31f6cb1f47 + .quad 0x352721c2bcda9c51 + .quad 0x48329952213fc985 + .quad 0x1087cf0d368a1746 + .quad 0x8e5261b166c15aa5 + .quad 0x2d5b2d842ed24c21 + + // 2^244 * 7 * G + + .quad 0x02cfebd9ebd3ded1 + .quad 0xd45b217739021974 + .quad 0x7576f813fe30a1b7 + .quad 0x5691b6f9a34ef6c2 + .quad 0x5eb7d13d196ac533 + .quad 0x377234ecdb80be2b + .quad 0xe144cffc7cf5ae24 + .quad 0x5226bcf9c441acec + .quad 0x79ee6c7223e5b547 + .quad 0x6f5f50768330d679 + .quad 0xed73e1e96d8adce9 + .quad 0x27c3da1e1d8ccc03 + + // 2^244 * 8 * G + + .quad 0x7eb9efb23fe24c74 + .quad 0x3e50f49f1651be01 + .quad 0x3ea732dc21858dea + .quad 0x17377bd75bb810f9 + .quad 0x28302e71630ef9f6 + .quad 0xc2d4a2032b64cee0 + .quad 0x090820304b6292be + .quad 0x5fca747aa82adf18 + .quad 0x232a03c35c258ea5 + .quad 0x86f23a2c6bcb0cf1 + .quad 0x3dad8d0d2e442166 + .quad 0x04a8933cab76862b + + // 2^248 * 1 * G + + .quad 0xd2c604b622943dff + .quad 0xbc8cbece44cfb3a0 + .quad 0x5d254ff397808678 + .quad 0x0fa3614f3b1ca6bf + .quad 0x69082b0e8c936a50 + .quad 0xf9c9a035c1dac5b6 + .quad 0x6fb73e54c4dfb634 + .quad 0x4005419b1d2bc140 + .quad 0xa003febdb9be82f0 + .quad 0x2089c1af3a44ac90 + .quad 0xf8499f911954fa8e + .quad 0x1fba218aef40ab42 + + // 2^248 * 2 * G + + .quad 0xab549448fac8f53e + .quad 0x81f6e89a7ba63741 + .quad 0x74fd6c7d6c2b5e01 + .quad 0x392e3acaa8c86e42 + .quad 0x4f3e57043e7b0194 + .quad 0xa81d3eee08daaf7f + .quad 0xc839c6ab99dcdef1 + .quad 0x6c535d13ff7761d5 + .quad 0x4cbd34e93e8a35af + .quad 0x2e0781445887e816 + .quad 0x19319c76f29ab0ab + .quad 0x25e17fe4d50ac13b + + // 2^248 * 3 * G + + .quad 0x0a289bd71e04f676 + .quad 0x208e1c52d6420f95 + .quad 0x5186d8b034691fab + .quad 0x255751442a9fb351 + .quad 0x915f7ff576f121a7 + .quad 0xc34a32272fcd87e3 + .quad 0xccba2fde4d1be526 + .quad 0x6bba828f8969899b + .quad 0xe2d1bc6690fe3901 + .quad 0x4cb54a18a0997ad5 + .quad 0x971d6914af8460d4 + .quad 0x559d504f7f6b7be4 + + // 2^248 * 4 * G + + .quad 0xa7738378b3eb54d5 + .quad 0x1d69d366a5553c7c + .quad 0x0a26cf62f92800ba + .quad 0x01ab12d5807e3217 + .quad 0x9c4891e7f6d266fd + .quad 0x0744a19b0307781b + .quad 0x88388f1d6061e23b + .quad 0x123ea6a3354bd50e + .quad 0x118d189041e32d96 + .quad 0xb9ede3c2d8315848 + .quad 0x1eab4271d83245d9 + .quad 0x4a3961e2c918a154 + + // 2^248 * 5 * G + + .quad 0x71dc3be0f8e6bba0 + .quad 0xd6cef8347effe30a + .quad 0xa992425fe13a476a + .quad 0x2cd6bce3fb1db763 + .quad 0x0327d644f3233f1e + .quad 0x499a260e34fcf016 + .quad 0x83b5a716f2dab979 + .quad 0x68aceead9bd4111f + .quad 0x38b4c90ef3d7c210 + .quad 0x308e6e24b7ad040c + .quad 0x3860d9f1b7e73e23 + .quad 0x595760d5b508f597 + + // 2^248 * 6 * G + + .quad 0x6129bfe104aa6397 + .quad 0x8f960008a4a7fccb + .quad 0x3f8bc0897d909458 + .quad 0x709fa43edcb291a9 + .quad 0x882acbebfd022790 + .quad 0x89af3305c4115760 + .quad 0x65f492e37d3473f4 + .quad 0x2cb2c5df54515a2b + .quad 0xeb0a5d8c63fd2aca + .quad 0xd22bc1662e694eff + .quad 0x2723f36ef8cbb03a + .quad 0x70f029ecf0c8131f + + // 2^248 * 7 * G + + .quad 0x461307b32eed3e33 + .quad 0xae042f33a45581e7 + .quad 0xc94449d3195f0366 + .quad 0x0b7d5d8a6c314858 + .quad 0x2a6aafaa5e10b0b9 + .quad 0x78f0a370ef041aa9 + .quad 0x773efb77aa3ad61f + .quad 0x44eca5a2a74bd9e1 + .quad 0x25d448327b95d543 + .quad 0x70d38300a3340f1d + .quad 0xde1c531c60e1c52b + .quad 0x272224512c7de9e4 + + // 2^248 * 8 * G + + .quad 0x1abc92af49c5342e + .quad 0xffeed811b2e6fad0 + .quad 0xefa28c8dfcc84e29 + .quad 0x11b5df18a44cc543 + .quad 0xbf7bbb8a42a975fc + .quad 0x8c5c397796ada358 + .quad 0xe27fc76fcdedaa48 + .quad 0x19735fd7f6bc20a6 + .quad 0xe3ab90d042c84266 + .quad 0xeb848e0f7f19547e + .quad 0x2503a1d065a497b9 + .quad 0x0fef911191df895f + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From dc4268b14cfefe1b8ee328fb9f447e423884331c Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 21 Apr 2023 09:19:46 -0700 Subject: [PATCH 34/42] Make labels unique across new Edwards functions using Torben Hansen's Python script listed in https://github.com/awslabs/s2n-bignum/pull/50 s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/8e80e0c95d6098733785b3126bb8d273f8717a50 --- arm/curve25519/edwards25519_scalarmulbase.S | 100 +++++++++--------- .../edwards25519_scalarmulbase_alt.S | 100 +++++++++--------- .../curve25519/edwards25519_scalarmulbase.S | 92 ++++++++-------- .../edwards25519_scalarmulbase_alt.S | 92 ++++++++-------- 4 files changed, 192 insertions(+), 192 deletions(-) diff --git a/arm/curve25519/edwards25519_scalarmulbase.S b/arm/curve25519/edwards25519_scalarmulbase.S index 475308050b..6ca092489f 100644 --- a/arm/curve25519/edwards25519_scalarmulbase.S +++ b/arm/curve25519/edwards25519_scalarmulbase.S @@ -582,8 +582,8 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase): // Initialize accumulator "acc" to either 0 or 2^251 * B depending on // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. - adr x10, edwards25519_0g - adr x11, edwards25519_251g + adr x10, edwards25519_scalarmulbase_edwards25519_0g + adr x11, edwards25519_scalarmulbase_edwards25519_251g ldp x0, x1, [x10] ldp x2, x3, [x11] csel x0, x0, x2, eq @@ -638,12 +638,12 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase): // end because we made sure bit 251 is clear in the reduced scalar. mov i, 0 - adr tab, edwards25519_gtable + adr tab, edwards25519_scalarmulbase_edwards25519_gtable mov bias, xzr // Start of the main loop, repeated 63 times for i = 0, 4, 8, ..., 248 -scalarloop: +edwards25519_scalarmulbase_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -926,7 +926,7 @@ scalarloop: add i, i, 4 cmp i, 252 - bcc scalarloop + bcc edwards25519_scalarmulbase_scalarloop // Insert the optional negation of the projective X coordinate, and // so by extension the final affine x coordinate x = X/Z and thus @@ -959,7 +959,7 @@ scalarloop: mov x0, 4 add x1, w_3 add x2, z_3 - adr x3, p_25519 + adr x3, edwards25519_scalarmulbase_p_25519 add x4, tmpspace // Inline copy of bignum_modinv, identical except for stripping out the @@ -971,7 +971,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +edwards25519_scalarmulbase_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -980,7 +980,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc edwards25519_scalarmulbase_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -997,7 +997,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +edwards25519_scalarmulbase_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -1008,7 +1008,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +edwards25519_scalarmulbase_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -1022,7 +1022,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc edwards25519_scalarmulbase_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -1042,7 +1042,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +edwards25519_scalarmulbase_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -1064,13 +1064,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, edwards25519_scalarmulbase_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +edwards25519_scalarmulbase_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -1097,7 +1097,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc edwards25519_scalarmulbase_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -1108,8 +1108,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, edwards25519_scalarmulbase_wmontend +edwards25519_scalarmulbase_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -1121,24 +1121,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, edwards25519_scalarmulbase_wmontloop +edwards25519_scalarmulbase_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +edwards25519_scalarmulbase_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, edwards25519_scalarmulbase_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +edwards25519_scalarmulbase_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -1146,7 +1146,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, edwards25519_scalarmulbase_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -1155,8 +1155,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, edwards25519_scalarmulbase_zmontend +edwards25519_scalarmulbase_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1168,24 +1168,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, edwards25519_scalarmulbase_zmontloop +edwards25519_scalarmulbase_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +edwards25519_scalarmulbase_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, edwards25519_scalarmulbase_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +edwards25519_scalarmulbase_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1193,13 +1193,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, edwards25519_scalarmulbase_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +edwards25519_scalarmulbase_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1226,13 +1226,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc edwards25519_scalarmulbase_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, edwards25519_scalarmulbase_negskip1 +edwards25519_scalarmulbase_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1242,8 +1242,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, edwards25519_scalarmulbase_negloop1 +edwards25519_scalarmulbase_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1252,8 +1252,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, edwards25519_scalarmulbase_negskip2 +edwards25519_scalarmulbase_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1263,15 +1263,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, edwards25519_scalarmulbase_negloop2 +edwards25519_scalarmulbase_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +edwards25519_scalarmulbase_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1280,11 +1280,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, edwards25519_scalarmulbase_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +edwards25519_scalarmulbase_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1293,9 +1293,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, edwards25519_scalarmulbase_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi edwards25519_scalarmulbase_outerloop // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1324,7 +1324,7 @@ zfliploop: // The modulus p_25519 = 2^255 - 19, for the modular inverse -p_25519: +edwards25519_scalarmulbase_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1333,7 +1333,7 @@ p_25519: // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +edwards25519_scalarmulbase_edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 @@ -1350,7 +1350,7 @@ edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 -edwards25519_251g: +edwards25519_scalarmulbase_edwards25519_251g: .quad 0x525f946d7c7220e7 .quad 0x4636b0b2f1e35444 @@ -1368,7 +1368,7 @@ edwards25519_251g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +edwards25519_scalarmulbase_edwards25519_gtable: // 2^0 * 1 * G diff --git a/arm/curve25519/edwards25519_scalarmulbase_alt.S b/arm/curve25519/edwards25519_scalarmulbase_alt.S index f5a197861a..e8dd9114a4 100644 --- a/arm/curve25519/edwards25519_scalarmulbase_alt.S +++ b/arm/curve25519/edwards25519_scalarmulbase_alt.S @@ -424,8 +424,8 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase_alt): // Initialize accumulator "acc" to either 0 or 2^251 * B depending on // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. - adr x10, edwards25519_0g - adr x11, edwards25519_251g + adr x10, edwards25519_scalarmulbase_alt_edwards25519_0g + adr x11, edwards25519_scalarmulbase_alt_edwards25519_251g ldp x0, x1, [x10] ldp x2, x3, [x11] csel x0, x0, x2, eq @@ -480,12 +480,12 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase_alt): // end because we made sure bit 251 is clear in the reduced scalar. mov i, 0 - adr tab, edwards25519_gtable + adr tab, edwards25519_scalarmulbase_alt_edwards25519_gtable mov bias, xzr // Start of the main loop, repeated 63 times for i = 0, 4, 8, ..., 248 -scalarloop: +edwards25519_scalarmulbase_alt_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -768,7 +768,7 @@ scalarloop: add i, i, 4 cmp i, 252 - bcc scalarloop + bcc edwards25519_scalarmulbase_alt_scalarloop // Insert the optional negation of the projective X coordinate, and // so by extension the final affine x coordinate x = X/Z and thus @@ -801,7 +801,7 @@ scalarloop: mov x0, 4 add x1, w_3 add x2, z_3 - adr x3, p_25519 + adr x3, edwards25519_scalarmulbase_alt_p_25519 add x4, tmpspace // Inline copy of bignum_modinv, identical except for stripping out the @@ -813,7 +813,7 @@ scalarloop: add x21, x4, x10 add x22, x21, x10 mov x10, xzr -copyloop: +edwards25519_scalarmulbase_alt_copyloop: ldr x11, [x2, x10, lsl #3] ldr x12, [x3, x10, lsl #3] str x11, [x21, x10, lsl #3] @@ -822,7 +822,7 @@ copyloop: str xzr, [x1, x10, lsl #3] add x10, x10, #0x1 cmp x10, x0 - b.cc copyloop + b.cc edwards25519_scalarmulbase_alt_copyloop ldr x11, [x4] sub x12, x11, #0x1 str x12, [x4] @@ -839,7 +839,7 @@ copyloop: madd x20, x12, x20, x20 madd x20, x11, x20, x20 lsl x2, x0, #7 -outerloop: +edwards25519_scalarmulbase_alt_outerloop: add x10, x2, #0x3f lsr x5, x10, #6 cmp x5, x0 @@ -850,7 +850,7 @@ outerloop: mov x16, xzr mov x19, xzr mov x10, xzr -toploop: +edwards25519_scalarmulbase_alt_toploop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] orr x17, x11, x12 @@ -864,7 +864,7 @@ toploop: csetm x19, ne add x10, x10, #0x1 cmp x10, x5 - b.cc toploop + b.cc edwards25519_scalarmulbase_alt_toploop orr x11, x13, x14 clz x12, x11 negs x17, x12 @@ -884,7 +884,7 @@ toploop: mov x9, #0x1 mov x10, #0x3a tst x15, #0x1 -innerloop: +edwards25519_scalarmulbase_alt_innerloop: csel x11, x14, xzr, ne csel x12, x16, xzr, ne csel x17, x8, xzr, ne @@ -906,13 +906,13 @@ innerloop: add x8, x8, x8 add x9, x9, x9 sub x10, x10, #0x1 - cbnz x10, innerloop + cbnz x10, edwards25519_scalarmulbase_alt_innerloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -congloop: +edwards25519_scalarmulbase_alt_congloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x6, x11 @@ -939,7 +939,7 @@ congloop: adc x14, x14, x15 add x10, x10, #0x1 cmp x10, x0 - b.cc congloop + b.cc edwards25519_scalarmulbase_alt_congloop extr x13, x13, x17, #58 extr x14, x14, x19, #58 ldr x11, [x4] @@ -950,8 +950,8 @@ congloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, wmontend -wmontloop: + cbz x11, edwards25519_scalarmulbase_alt_wmontend +edwards25519_scalarmulbase_alt_wmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] mul x15, x17, x11 @@ -963,24 +963,24 @@ wmontloop: str x12, [x4, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wmontloop -wmontend: + cbnz x11, edwards25519_scalarmulbase_alt_wmontloop +edwards25519_scalarmulbase_alt_wmontend: adcs x16, x16, x13 adc x13, xzr, xzr sub x15, x10, #0x1 str x16, [x4, x15, lsl #3] negs x10, xzr -wcmploop: +edwards25519_scalarmulbase_alt_wcmploop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcmploop + cbnz x11, edwards25519_scalarmulbase_alt_wcmploop sbcs xzr, x13, xzr csetm x13, cs negs x10, xzr -wcorrloop: +edwards25519_scalarmulbase_alt_wcorrloop: ldr x11, [x4, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x13 @@ -988,7 +988,7 @@ wcorrloop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wcorrloop + cbnz x11, edwards25519_scalarmulbase_alt_wcorrloop ldr x11, [x1] mul x17, x11, x20 ldr x12, [x3] @@ -997,8 +997,8 @@ wcorrloop: adds x11, x11, x15 mov x10, #0x1 sub x11, x0, #0x1 - cbz x11, zmontend -zmontloop: + cbz x11, edwards25519_scalarmulbase_alt_zmontend +edwards25519_scalarmulbase_alt_zmontloop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] mul x15, x17, x11 @@ -1010,24 +1010,24 @@ zmontloop: str x12, [x1, x15, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zmontloop -zmontend: + cbnz x11, edwards25519_scalarmulbase_alt_zmontloop +edwards25519_scalarmulbase_alt_zmontend: adcs x16, x16, x14 adc x14, xzr, xzr sub x15, x10, #0x1 str x16, [x1, x15, lsl #3] negs x10, xzr -zcmploop: +edwards25519_scalarmulbase_alt_zcmploop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] sbcs xzr, x11, x12 add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcmploop + cbnz x11, edwards25519_scalarmulbase_alt_zcmploop sbcs xzr, x14, xzr csetm x14, cs negs x10, xzr -zcorrloop: +edwards25519_scalarmulbase_alt_zcorrloop: ldr x11, [x1, x10, lsl #3] ldr x12, [x3, x10, lsl #3] and x12, x12, x14 @@ -1035,13 +1035,13 @@ zcorrloop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zcorrloop + cbnz x11, edwards25519_scalarmulbase_alt_zcorrloop mov x13, xzr mov x14, xzr mov x17, xzr mov x19, xzr mov x10, xzr -crossloop: +edwards25519_scalarmulbase_alt_crossloop: ldr x11, [x21, x10, lsl #3] ldr x12, [x22, x10, lsl #3] mul x15, x6, x11 @@ -1068,13 +1068,13 @@ crossloop: csetm x19, cc add x10, x10, #0x1 cmp x10, x5 - b.cc crossloop + b.cc edwards25519_scalarmulbase_alt_crossloop cmn x17, x17 ldr x15, [x21] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip1 -negloop1: + cbz x6, edwards25519_scalarmulbase_alt_negskip1 +edwards25519_scalarmulbase_alt_negloop1: add x11, x10, #0x8 ldr x12, [x21, x11] extr x15, x12, x15, #58 @@ -1084,8 +1084,8 @@ negloop1: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop1 -negskip1: + cbnz x6, edwards25519_scalarmulbase_alt_negloop1 +edwards25519_scalarmulbase_alt_negskip1: extr x15, x13, x15, #58 eor x15, x15, x17 adcs x15, x15, xzr @@ -1094,8 +1094,8 @@ negskip1: ldr x15, [x22] mov x10, xzr sub x6, x5, #0x1 - cbz x6, negskip2 -negloop2: + cbz x6, edwards25519_scalarmulbase_alt_negskip2 +edwards25519_scalarmulbase_alt_negloop2: add x11, x10, #0x8 ldr x12, [x22, x11] extr x15, x12, x15, #58 @@ -1105,15 +1105,15 @@ negloop2: mov x15, x12 add x10, x10, #0x8 sub x6, x6, #0x1 - cbnz x6, negloop2 -negskip2: + cbnz x6, edwards25519_scalarmulbase_alt_negloop2 +edwards25519_scalarmulbase_alt_negskip2: extr x15, x14, x15, #58 eor x15, x15, x19 adcs x15, x15, xzr str x15, [x22, x10] mov x10, xzr cmn x17, x17 -wfliploop: +edwards25519_scalarmulbase_alt_wfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x4, x10, lsl #3] and x11, x11, x17 @@ -1122,11 +1122,11 @@ wfliploop: str x11, [x4, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, wfliploop + cbnz x11, edwards25519_scalarmulbase_alt_wfliploop mvn x19, x19 mov x10, xzr cmn x19, x19 -zfliploop: +edwards25519_scalarmulbase_alt_zfliploop: ldr x11, [x3, x10, lsl #3] ldr x12, [x1, x10, lsl #3] and x11, x11, x19 @@ -1135,9 +1135,9 @@ zfliploop: str x11, [x1, x10, lsl #3] add x10, x10, #0x1 sub x11, x10, x0 - cbnz x11, zfliploop + cbnz x11, edwards25519_scalarmulbase_alt_zfliploop subs x2, x2, #0x3a - b.hi outerloop + b.hi edwards25519_scalarmulbase_alt_outerloop // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1166,7 +1166,7 @@ zfliploop: // The modulus p_25519 = 2^255 - 19, for the modular inverse -p_25519: +edwards25519_scalarmulbase_alt_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1175,7 +1175,7 @@ p_25519: // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +edwards25519_scalarmulbase_alt_edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 @@ -1192,7 +1192,7 @@ edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 -edwards25519_251g: +edwards25519_scalarmulbase_alt_edwards25519_251g: .quad 0x525f946d7c7220e7 .quad 0x4636b0b2f1e35444 @@ -1210,7 +1210,7 @@ edwards25519_251g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +edwards25519_scalarmulbase_alt_edwards25519_gtable: // 2^0 * 1 * G diff --git a/x86_att/curve25519/edwards25519_scalarmulbase.S b/x86_att/curve25519/edwards25519_scalarmulbase.S index 0a3e7f92df..a024c9daa4 100644 --- a/x86_att/curve25519/edwards25519_scalarmulbase.S +++ b/x86_att/curve25519/edwards25519_scalarmulbase.S @@ -337,12 +337,12 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase): pushq %rsi movq %rcx, %rdi movq %rdx, %rsi - callq curve25519_x25519base_standard + callq edwards25519_scalarmulbase_curve25519_x25519base_standard popq %rsi popq %rdi ret -curve25519_x25519base_standard: +edwards25519_scalarmulbase_curve25519_x25519base_standard: #endif // Save registers, make room for temps, preserve input arguments. @@ -428,8 +428,8 @@ curve25519_x25519base_standard: // Initialize accumulator "acc" to either 0 or 2^251 * B depending on // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. - leaq edwards25519_0g(%rip), %r10 - leaq edwards25519_251g(%rip), %r11 + leaq edwards25519_scalarmulbase_edwards25519_0g(%rip), %r10 + leaq edwards25519_scalarmulbase_edwards25519_251g(%rip), %r11 movq (%r10), %rax movq (%r11), %rcx @@ -512,13 +512,13 @@ curve25519_x25519base_standard: // end because we made sure bit 251 is clear in the reduced scalar. movq $0, i - leaq edwards25519_gtable(%rip), %rax + leaq edwards25519_scalarmulbase_edwards25519_gtable(%rip), %rax movq %rax, tab movq $0, bias // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +edwards25519_scalarmulbase_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -875,7 +875,7 @@ scalarloop: addq $4, i cmpq $252, i - jc scalarloop + jc edwards25519_scalarmulbase_scalarloop // Insert the optional negation of the projective X coordinate, and // so by extension the final affine x coordinate x = X/Z and thus @@ -914,7 +914,7 @@ scalarloop: movq $4, %rdi leaq 224(%rsp), %rsi leaq 192(%rsp), %rdx - leaq p_25519(%rip), %rcx + leaq edwards25519_scalarmulbase_p_25519(%rip), %rcx leaq 256(%rsp), %r8 // Inline copy of bignum_modinv, identical except for stripping out the @@ -932,7 +932,7 @@ scalarloop: leaq (%r10,%rdi,8), %r15 xorq %r11, %r11 xorq %r9, %r9 -copyloop: +edwards25519_scalarmulbase_copyloop: movq (%rdx,%r9,8), %rax movq (%rcx,%r9,8), %rbx movq %rax, (%r10,%r9,8) @@ -941,7 +941,7 @@ copyloop: movq %r11, (%rsi,%r9,8) incq %r9 cmpq %rdi, %r9 - jb copyloop + jb edwards25519_scalarmulbase_copyloop movq (%r8), %rax movq %rax, %rbx decq %rbx @@ -973,7 +973,7 @@ copyloop: movq %rdi, %rax shlq $0x7, %rax movq %rax, 0x20(%rsp) -outerloop: +edwards25519_scalarmulbase_outerloop: movq 0x20(%rsp), %r13 addq $0x3f, %r13 shrq $0x6, %r13 @@ -987,7 +987,7 @@ outerloop: movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 -toploop: +edwards25519_scalarmulbase_toploop: movq (%r8,%r9,8), %rbx movq (%r15,%r9,8), %rcx movq %r11, %r10 @@ -1003,7 +1003,7 @@ toploop: sbbq %r11, %r11 incq %r9 cmpq %r13, %r9 - jb toploop + jb edwards25519_scalarmulbase_toploop movq %r12, %rax orq %rbp, %rax bsrq %rax, %rcx @@ -1023,7 +1023,7 @@ toploop: movq %r13, 0x10(%rsp) movq %r8, (%rsp) movq %r15, 0x18(%rsp) -innerloop: +edwards25519_scalarmulbase_innerloop: xorl %eax, %eax xorl %ebx, %ebx xorq %r8, %r8 @@ -1053,7 +1053,7 @@ innerloop: addq %rcx, %rcx addq %rdx, %rdx decq %r9 - jne innerloop + jne edwards25519_scalarmulbase_innerloop movq 0x8(%rsp), %rdi movq 0x10(%rsp), %r13 movq (%rsp), %r8 @@ -1069,7 +1069,7 @@ innerloop: xorq %r10, %r10 xorq %r11, %r11 xorq %r9, %r9 -congloop: +edwards25519_scalarmulbase_congloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1100,7 +1100,7 @@ congloop: movq %rbp, %rsi incq %r9 cmpq %rdi, %r9 - jb congloop + jb edwards25519_scalarmulbase_congloop shldq $0x6, %r10, %r14 shldq $0x6, %r11, %rsi movq 0x48(%rsp), %r15 @@ -1114,8 +1114,8 @@ congloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je wmontend -wmontloop: + je edwards25519_scalarmulbase_wmontend +edwards25519_scalarmulbase_wmontloop: adcq (%r8,%r9,8), %r10 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1126,26 +1126,26 @@ wmontloop: movq %rdx, %r10 incq %r9 decq %rcx - jne wmontloop -wmontend: + jne edwards25519_scalarmulbase_wmontloop +edwards25519_scalarmulbase_wmontend: adcq %r14, %r10 movq %r10, -0x8(%r8,%rdi,8) sbbq %r10, %r10 negq %r10 movq %rdi, %rcx xorq %r9, %r9 -wcmploop: +edwards25519_scalarmulbase_wcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne wcmploop + jne edwards25519_scalarmulbase_wcmploop sbbq $0x0, %r10 sbbq %r10, %r10 notq %r10 xorq %rcx, %rcx xorq %r9, %r9 -wcorrloop: +edwards25519_scalarmulbase_wcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r10, %rbx @@ -1155,7 +1155,7 @@ wcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb wcorrloop + jb edwards25519_scalarmulbase_wcorrloop movq 0x40(%rsp), %r8 movq (%r8), %rbx movq 0x28(%rsp), %rbp @@ -1167,8 +1167,8 @@ wcorrloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je zmontend -zmontloop: + je edwards25519_scalarmulbase_zmontend +edwards25519_scalarmulbase_zmontloop: adcq (%r8,%r9,8), %r11 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1179,26 +1179,26 @@ zmontloop: movq %rdx, %r11 incq %r9 decq %rcx - jne zmontloop -zmontend: + jne edwards25519_scalarmulbase_zmontloop +edwards25519_scalarmulbase_zmontend: adcq %rsi, %r11 movq %r11, -0x8(%r8,%rdi,8) sbbq %r11, %r11 negq %r11 movq %rdi, %rcx xorq %r9, %r9 -zcmploop: +edwards25519_scalarmulbase_zcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne zcmploop + jne edwards25519_scalarmulbase_zcmploop sbbq $0x0, %r11 sbbq %r11, %r11 notq %r11 xorq %rcx, %rcx xorq %r9, %r9 -zcorrloop: +edwards25519_scalarmulbase_zcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r11, %rbx @@ -1208,7 +1208,7 @@ zcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb zcorrloop + jb edwards25519_scalarmulbase_zcorrloop movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 @@ -1216,7 +1216,7 @@ zcorrloop: xorq %r14, %r14 xorq %rbp, %rbp xorq %rsi, %rsi -crossloop: +edwards25519_scalarmulbase_crossloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1247,13 +1247,13 @@ crossloop: movq %r11, %rsi incq %r9 cmpq %r13, %r9 - jb crossloop + jb edwards25519_scalarmulbase_crossloop xorq %r9, %r9 movq %r12, %r10 movq %rbp, %r11 xorq %r12, %r14 xorq %rbp, %rsi -optnegloop: +edwards25519_scalarmulbase_optnegloop: movq (%r8,%r9,8), %rax xorq %r12, %rax negq %r10 @@ -1268,11 +1268,11 @@ optnegloop: movq %rax, (%r15,%r9,8) incq %r9 cmpq %r13, %r9 - jb optnegloop + jb edwards25519_scalarmulbase_optnegloop subq %r10, %r14 subq %r11, %rsi movq %r13, %r9 -shiftloop: +edwards25519_scalarmulbase_shiftloop: movq -0x8(%r8,%r9,8), %rax movq %rax, %r10 shrdq $0x3a, %r14, %rax @@ -1284,7 +1284,7 @@ shiftloop: movq %rax, -0x8(%r15,%r9,8) movq %r11, %rsi decq %r9 - jne shiftloop + jne edwards25519_scalarmulbase_shiftloop notq %rbp movq 0x48(%rsp), %rcx movq 0x38(%rsp), %r8 @@ -1292,7 +1292,7 @@ shiftloop: movq %r12, %r10 movq %rbp, %r11 xorq %r9, %r9 -fliploop: +edwards25519_scalarmulbase_fliploop: movq %rbp, %rdx movq (%rcx,%r9,8), %rax andq %rax, %rdx @@ -1311,9 +1311,9 @@ fliploop: movq %rdx, (%r15,%r9,8) incq %r9 cmpq %rdi, %r9 - jb fliploop + jb edwards25519_scalarmulbase_fliploop subq $0x3a, 0x20(%rsp) - ja outerloop + ja edwards25519_scalarmulbase_outerloop // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1346,7 +1346,7 @@ fliploop: // The modulus, for the modular inverse -p_25519: +edwards25519_scalarmulbase_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1355,7 +1355,7 @@ p_25519: // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +edwards25519_scalarmulbase_edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 @@ -1372,7 +1372,7 @@ edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 -edwards25519_251g: +edwards25519_scalarmulbase_edwards25519_251g: .quad 0x525f946d7c7220e7 .quad 0x4636b0b2f1e35444 @@ -1390,7 +1390,7 @@ edwards25519_251g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +edwards25519_scalarmulbase_edwards25519_gtable: // 2^0 * 1 * G diff --git a/x86_att/curve25519/edwards25519_scalarmulbase_alt.S b/x86_att/curve25519/edwards25519_scalarmulbase_alt.S index bb07e1f207..e66492083f 100644 --- a/x86_att/curve25519/edwards25519_scalarmulbase_alt.S +++ b/x86_att/curve25519/edwards25519_scalarmulbase_alt.S @@ -413,12 +413,12 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase_alt): pushq %rsi movq %rcx, %rdi movq %rdx, %rsi - callq curve25519_x25519base_standard + callq edwards25519_scalarmulbase_alt_curve25519_x25519base_standard popq %rsi popq %rdi ret -curve25519_x25519base_standard: +edwards25519_scalarmulbase_alt_curve25519_x25519base_standard: #endif // Save registers, make room for temps, preserve input arguments. @@ -504,8 +504,8 @@ curve25519_x25519base_standard: // Initialize accumulator "acc" to either 0 or 2^251 * B depending on // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. - leaq edwards25519_0g(%rip), %r10 - leaq edwards25519_251g(%rip), %r11 + leaq edwards25519_scalarmulbase_alt_edwards25519_0g(%rip), %r10 + leaq edwards25519_scalarmulbase_alt_edwards25519_251g(%rip), %r11 movq (%r10), %rax movq (%r11), %rcx @@ -588,13 +588,13 @@ curve25519_x25519base_standard: // end because we made sure bit 251 is clear in the reduced scalar. movq $0, i - leaq edwards25519_gtable(%rip), %rax + leaq edwards25519_scalarmulbase_alt_edwards25519_gtable(%rip), %rax movq %rax, tab movq $0, bias // Start of the main loop, repeated 63 times for i = 4, 8, ..., 252 -scalarloop: +edwards25519_scalarmulbase_alt_scalarloop: // Look at the next 4-bit field "bf", adding the previous bias as well. // Choose the table index "ix" as bf when bf <= 8 and 16 - bf for bf >= 9, @@ -951,7 +951,7 @@ scalarloop: addq $4, i cmpq $252, i - jc scalarloop + jc edwards25519_scalarmulbase_alt_scalarloop // Insert the optional negation of the projective X coordinate, and // so by extension the final affine x coordinate x = X/Z and thus @@ -990,7 +990,7 @@ scalarloop: movq $4, %rdi leaq 224(%rsp), %rsi leaq 192(%rsp), %rdx - leaq p_25519(%rip), %rcx + leaq edwards25519_scalarmulbase_alt_p_25519(%rip), %rcx leaq 256(%rsp), %r8 // Inline copy of bignum_modinv, identical except for stripping out the @@ -1008,7 +1008,7 @@ scalarloop: leaq (%r10,%rdi,8), %r15 xorq %r11, %r11 xorq %r9, %r9 -copyloop: +edwards25519_scalarmulbase_alt_copyloop: movq (%rdx,%r9,8), %rax movq (%rcx,%r9,8), %rbx movq %rax, (%r10,%r9,8) @@ -1017,7 +1017,7 @@ copyloop: movq %r11, (%rsi,%r9,8) incq %r9 cmpq %rdi, %r9 - jb copyloop + jb edwards25519_scalarmulbase_alt_copyloop movq (%r8), %rax movq %rax, %rbx decq %rbx @@ -1049,7 +1049,7 @@ copyloop: movq %rdi, %rax shlq $0x7, %rax movq %rax, 0x20(%rsp) -outerloop: +edwards25519_scalarmulbase_alt_outerloop: movq 0x20(%rsp), %r13 addq $0x3f, %r13 shrq $0x6, %r13 @@ -1063,7 +1063,7 @@ outerloop: movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 -toploop: +edwards25519_scalarmulbase_alt_toploop: movq (%r8,%r9,8), %rbx movq (%r15,%r9,8), %rcx movq %r11, %r10 @@ -1079,7 +1079,7 @@ toploop: sbbq %r11, %r11 incq %r9 cmpq %r13, %r9 - jb toploop + jb edwards25519_scalarmulbase_alt_toploop movq %r12, %rax orq %rbp, %rax bsrq %rax, %rcx @@ -1099,7 +1099,7 @@ toploop: movq %r13, 0x10(%rsp) movq %r8, (%rsp) movq %r15, 0x18(%rsp) -innerloop: +edwards25519_scalarmulbase_alt_innerloop: xorl %eax, %eax xorl %ebx, %ebx xorq %r8, %r8 @@ -1129,7 +1129,7 @@ innerloop: addq %rcx, %rcx addq %rdx, %rdx decq %r9 - jne innerloop + jne edwards25519_scalarmulbase_alt_innerloop movq 0x8(%rsp), %rdi movq 0x10(%rsp), %r13 movq (%rsp), %r8 @@ -1145,7 +1145,7 @@ innerloop: xorq %r10, %r10 xorq %r11, %r11 xorq %r9, %r9 -congloop: +edwards25519_scalarmulbase_alt_congloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1176,7 +1176,7 @@ congloop: movq %rbp, %rsi incq %r9 cmpq %rdi, %r9 - jb congloop + jb edwards25519_scalarmulbase_alt_congloop shldq $0x6, %r10, %r14 shldq $0x6, %r11, %rsi movq 0x48(%rsp), %r15 @@ -1190,8 +1190,8 @@ congloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je wmontend -wmontloop: + je edwards25519_scalarmulbase_alt_wmontend +edwards25519_scalarmulbase_alt_wmontloop: adcq (%r8,%r9,8), %r10 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1202,26 +1202,26 @@ wmontloop: movq %rdx, %r10 incq %r9 decq %rcx - jne wmontloop -wmontend: + jne edwards25519_scalarmulbase_alt_wmontloop +edwards25519_scalarmulbase_alt_wmontend: adcq %r14, %r10 movq %r10, -0x8(%r8,%rdi,8) sbbq %r10, %r10 negq %r10 movq %rdi, %rcx xorq %r9, %r9 -wcmploop: +edwards25519_scalarmulbase_alt_wcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne wcmploop + jne edwards25519_scalarmulbase_alt_wcmploop sbbq $0x0, %r10 sbbq %r10, %r10 notq %r10 xorq %rcx, %rcx xorq %r9, %r9 -wcorrloop: +edwards25519_scalarmulbase_alt_wcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r10, %rbx @@ -1231,7 +1231,7 @@ wcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb wcorrloop + jb edwards25519_scalarmulbase_alt_wcorrloop movq 0x40(%rsp), %r8 movq (%r8), %rbx movq 0x28(%rsp), %rbp @@ -1243,8 +1243,8 @@ wcorrloop: movl $0x1, %r9d movq %rdi, %rcx decq %rcx - je zmontend -zmontloop: + je edwards25519_scalarmulbase_alt_zmontend +edwards25519_scalarmulbase_alt_zmontloop: adcq (%r8,%r9,8), %r11 sbbq %rbx, %rbx movq (%r15,%r9,8), %rax @@ -1255,26 +1255,26 @@ zmontloop: movq %rdx, %r11 incq %r9 decq %rcx - jne zmontloop -zmontend: + jne edwards25519_scalarmulbase_alt_zmontloop +edwards25519_scalarmulbase_alt_zmontend: adcq %rsi, %r11 movq %r11, -0x8(%r8,%rdi,8) sbbq %r11, %r11 negq %r11 movq %rdi, %rcx xorq %r9, %r9 -zcmploop: +edwards25519_scalarmulbase_alt_zcmploop: movq (%r8,%r9,8), %rax sbbq (%r15,%r9,8), %rax incq %r9 decq %rcx - jne zcmploop + jne edwards25519_scalarmulbase_alt_zcmploop sbbq $0x0, %r11 sbbq %r11, %r11 notq %r11 xorq %rcx, %rcx xorq %r9, %r9 -zcorrloop: +edwards25519_scalarmulbase_alt_zcorrloop: movq (%r8,%r9,8), %rax movq (%r15,%r9,8), %rbx andq %r11, %rbx @@ -1284,7 +1284,7 @@ zcorrloop: movq %rax, (%r8,%r9,8) incq %r9 cmpq %rdi, %r9 - jb zcorrloop + jb edwards25519_scalarmulbase_alt_zcorrloop movq 0x30(%rsp), %r8 leaq (%r8,%rdi,8), %r15 xorq %r9, %r9 @@ -1292,7 +1292,7 @@ zcorrloop: xorq %r14, %r14 xorq %rbp, %rbp xorq %rsi, %rsi -crossloop: +edwards25519_scalarmulbase_alt_crossloop: movq (%r8,%r9,8), %rcx movq (%rsp), %rax mulq %rcx @@ -1323,13 +1323,13 @@ crossloop: movq %r11, %rsi incq %r9 cmpq %r13, %r9 - jb crossloop + jb edwards25519_scalarmulbase_alt_crossloop xorq %r9, %r9 movq %r12, %r10 movq %rbp, %r11 xorq %r12, %r14 xorq %rbp, %rsi -optnegloop: +edwards25519_scalarmulbase_alt_optnegloop: movq (%r8,%r9,8), %rax xorq %r12, %rax negq %r10 @@ -1344,11 +1344,11 @@ optnegloop: movq %rax, (%r15,%r9,8) incq %r9 cmpq %r13, %r9 - jb optnegloop + jb edwards25519_scalarmulbase_alt_optnegloop subq %r10, %r14 subq %r11, %rsi movq %r13, %r9 -shiftloop: +edwards25519_scalarmulbase_alt_shiftloop: movq -0x8(%r8,%r9,8), %rax movq %rax, %r10 shrdq $0x3a, %r14, %rax @@ -1360,7 +1360,7 @@ shiftloop: movq %rax, -0x8(%r15,%r9,8) movq %r11, %rsi decq %r9 - jne shiftloop + jne edwards25519_scalarmulbase_alt_shiftloop notq %rbp movq 0x48(%rsp), %rcx movq 0x38(%rsp), %r8 @@ -1368,7 +1368,7 @@ shiftloop: movq %r12, %r10 movq %rbp, %r11 xorq %r9, %r9 -fliploop: +edwards25519_scalarmulbase_alt_fliploop: movq %rbp, %rdx movq (%rcx,%r9,8), %rax andq %rax, %rdx @@ -1387,9 +1387,9 @@ fliploop: movq %rdx, (%r15,%r9,8) incq %r9 cmpq %rdi, %r9 - jb fliploop + jb edwards25519_scalarmulbase_alt_fliploop subq $0x3a, 0x20(%rsp) - ja outerloop + ja edwards25519_scalarmulbase_alt_outerloop // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1422,7 +1422,7 @@ fliploop: // The modulus, for the modular inverse -p_25519: +edwards25519_scalarmulbase_alt_p_25519: .quad 0xffffffffffffffed .quad 0xffffffffffffffff .quad 0xffffffffffffffff @@ -1431,7 +1431,7 @@ p_25519: // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_0g: +edwards25519_scalarmulbase_alt_edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 @@ -1448,7 +1448,7 @@ edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 -edwards25519_251g: +edwards25519_scalarmulbase_alt_edwards25519_251g: .quad 0x525f946d7c7220e7 .quad 0x4636b0b2f1e35444 @@ -1466,7 +1466,7 @@ edwards25519_251g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_gtable: +edwards25519_scalarmulbase_alt_edwards25519_gtable: // 2^0 * 1 * G From b13f1e79a398a6bb1d095d49057fd307b009a521 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 13 Jun 2023 17:05:22 -0700 Subject: [PATCH 35/42] Add edwards25519 combined fresh and base point scalar multiplication The function edwards25519_scalarmuldouble[_alt] produces the sum of two scalar products of points on the edwards25519 curve, i.e. a result n * P + m * B where n and m are arbitrary scalar arguments, P is an arbitrary point argument and B is the standard basepoint. It seems a natural primitive for Ed25519 signature verification. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/ef1b4edc5772ea54729f961c82e8b02842d76782 --- arm/curve25519/edwards25519_scalarmuldouble.S | 2468 +++++++++++++++ .../edwards25519_scalarmuldouble_alt.S | 2252 +++++++++++++ .../curve25519/edwards25519_scalarmuldouble.S | 2666 ++++++++++++++++ .../edwards25519_scalarmuldouble_alt.S | 2783 +++++++++++++++++ 4 files changed, 10169 insertions(+) create mode 100644 arm/curve25519/edwards25519_scalarmuldouble.S create mode 100644 arm/curve25519/edwards25519_scalarmuldouble_alt.S create mode 100644 x86_att/curve25519/edwards25519_scalarmuldouble.S create mode 100644 x86_att/curve25519/edwards25519_scalarmuldouble_alt.S diff --git a/arm/curve25519/edwards25519_scalarmuldouble.S b/arm/curve25519/edwards25519_scalarmuldouble.S new file mode 100644 index 0000000000..cd760f1212 --- /dev/null +++ b/arm/curve25519/edwards25519_scalarmuldouble.S @@ -0,0 +1,2468 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Double scalar multiplication for edwards25519, fresh and base point +// Input scalar[4], point[8], bscalar[4]; output res[8] +// +// extern void edwards25519_scalarmuldouble +// (uint64_t res[static 8],uint64_t scalar[static 4], +// uint64_t point[static 8],uint64_t bscalar[static 4]); +// +// Given scalar = n, point = P and bscalar = m, returns in res +// the point (X,Y) = n * P + m * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Both 256-bit coordinates of the input point P are implicitly +// reduced modulo 2^255-19 if they are not already in reduced form, +// but the conventional usage is that they *are* already reduced. +// The scalars can be arbitrary 256-bit numbers but may also be +// considered as implicitly reduced modulo the group order. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point, X3 = bscalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmuldouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmuldouble) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x25 + +// Additional pointer variables for local subroutines + +#define p0 x22 +#define p1 x23 +#define p2 x24 + +// Other variables that are only needed prior to the modular inverse. + +#define i x19 +#define bf x20 +#define cf x21 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) +#define resy res, #(1*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) +#define bscalar sp, #(1*NUMSIZE) + +#define acc sp, #(2*NUMSIZE) +#define acc_x sp, #(2*NUMSIZE) +#define acc_y sp, #(3*NUMSIZE) +#define acc_z sp, #(4*NUMSIZE) +#define acc_w sp, #(5*NUMSIZE) + +#define tabent sp, #(6*NUMSIZE) +#define btabent sp, #(10*NUMSIZE) + +#define tab sp, #(13*NUMSIZE) + +// Total size to reserve on the stack (excluding local subroutines) + +#define NSPACE (45*NUMSIZE) + +// Sub-references used in local subroutines with local stack + +#define x_0 p0, #0 +#define y_0 p0, #NUMSIZE +#define z_0 p0, #(2*NUMSIZE) +#define w_0 p0, #(3*NUMSIZE) + +#define x_1 p1, #0 +#define y_1 p1, #NUMSIZE +#define z_1 p1, #(2*NUMSIZE) +#define w_1 p1, #(3*NUMSIZE) + +#define x_2 p2, #0 +#define y_2 p2, #NUMSIZE +#define z_2 p2, #(2*NUMSIZE) +#define w_2 p2, #(3*NUMSIZE) + +#define t0 sp, #(0*NUMSIZE) +#define t1 sp, #(1*NUMSIZE) +#define t2 sp, #(2*NUMSIZE) +#define t3 sp, #(3*NUMSIZE) +#define t4 sp, #(4*NUMSIZE) +#define t5 sp, #(5*NUMSIZE) + +// Load 64-bit immediate into a register + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// Macro wrapping up the basic field operation bignum_mul_p25519, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umaddl x5, w5, w0, x5; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + mov x3, #0x13; \ + tst x10, #0x8000000000000000; \ + csel x3, x3, xzr, pl; \ + subs x7, x7, x3; \ + sbcs x8, x8, xzr; \ + sbcs x9, x9, xzr; \ + sbc x10, x10, xzr; \ + and x10, x10, #0x7fffffffffffffff; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x5, x6, [P2]; \ + umull x7, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x8, w16, w0; \ + umull x16, w3, w16; \ + adds x7, x7, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x8, x8, x15; \ + adds x7, x7, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x8, x8, x16; \ + mul x9, x4, x6; \ + umulh x10, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x9, x9, x8; \ + adc x10, x10, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x8, x7, x9; \ + adcs x9, x9, x10; \ + adc x10, x10, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x8, x15, x8; \ + eor x3, x3, x16; \ + adcs x9, x3, x9; \ + adc x10, x10, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P2+16]; \ + umull x11, w3, w5; \ + lsr x0, x3, #32; \ + umull x15, w0, w5; \ + lsr x16, x5, #32; \ + umull x12, w16, w0; \ + umull x16, w3, w16; \ + adds x11, x11, x15, lsl #32; \ + lsr x15, x15, #32; \ + adc x12, x12, x15; \ + adds x11, x11, x16, lsl #32; \ + lsr x16, x16, #32; \ + adc x12, x12, x16; \ + mul x13, x4, x6; \ + umulh x14, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x16, cc; \ + adds x13, x13, x12; \ + adc x14, x14, xzr; \ + subs x3, x5, x6; \ + cneg x3, x3, cc; \ + cinv x16, x16, cc; \ + mul x15, x4, x3; \ + umulh x3, x4, x3; \ + adds x12, x11, x13; \ + adcs x13, x13, x14; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x15, x15, x16; \ + adcs x12, x15, x12; \ + eor x3, x3, x16; \ + adcs x13, x3, x13; \ + adc x14, x14, x16; \ + ldp x3, x4, [P1+16]; \ + ldp x15, x16, [P1]; \ + subs x3, x3, x15; \ + sbcs x4, x4, x16; \ + csetm x16, cc; \ + ldp x15, x0, [P2]; \ + subs x5, x15, x5; \ + sbcs x6, x0, x6; \ + csetm x0, cc; \ + eor x3, x3, x16; \ + subs x3, x3, x16; \ + eor x4, x4, x16; \ + sbc x4, x4, x16; \ + eor x5, x5, x0; \ + subs x5, x5, x0; \ + eor x6, x6, x0; \ + sbc x6, x6, x0; \ + eor x16, x0, x16; \ + adds x11, x11, x9; \ + adcs x12, x12, x10; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + mul x2, x3, x5; \ + umulh x0, x3, x5; \ + mul x15, x4, x6; \ + umulh x1, x4, x6; \ + subs x4, x4, x3; \ + cneg x4, x4, cc; \ + csetm x9, cc; \ + adds x15, x15, x0; \ + adc x1, x1, xzr; \ + subs x6, x5, x6; \ + cneg x6, x6, cc; \ + cinv x9, x9, cc; \ + mul x5, x4, x6; \ + umulh x6, x4, x6; \ + adds x0, x2, x15; \ + adcs x15, x15, x1; \ + adc x1, x1, xzr; \ + cmn x9, #0x1; \ + eor x5, x5, x9; \ + adcs x0, x5, x0; \ + eor x6, x6, x9; \ + adcs x15, x6, x15; \ + adc x1, x1, x9; \ + adds x9, x11, x7; \ + adcs x10, x12, x8; \ + adcs x11, x13, x11; \ + adcs x12, x14, x12; \ + adcs x13, x13, xzr; \ + adc x14, x14, xzr; \ + cmn x16, #0x1; \ + eor x2, x2, x16; \ + adcs x9, x2, x9; \ + eor x0, x0, x16; \ + adcs x10, x0, x10; \ + eor x15, x15, x16; \ + adcs x11, x15, x11; \ + eor x1, x1, x16; \ + adcs x12, x1, x12; \ + adcs x13, x13, x16; \ + adc x14, x14, x16; \ + mov x3, #0x26; \ + umull x4, w11, w3; \ + add x4, x4, w7, uxtw; \ + lsr x7, x7, #32; \ + lsr x11, x11, #32; \ + umaddl x11, w11, w3, x7; \ + mov x7, x4; \ + umull x4, w12, w3; \ + add x4, x4, w8, uxtw; \ + lsr x8, x8, #32; \ + lsr x12, x12, #32; \ + umaddl x12, w12, w3, x8; \ + mov x8, x4; \ + umull x4, w13, w3; \ + add x4, x4, w9, uxtw; \ + lsr x9, x9, #32; \ + lsr x13, x13, #32; \ + umaddl x13, w13, w3, x9; \ + mov x9, x4; \ + umull x4, w14, w3; \ + add x4, x4, w10, uxtw; \ + lsr x10, x10, #32; \ + lsr x14, x14, #32; \ + umaddl x14, w14, w3, x10; \ + mov x10, x4; \ + lsr x0, x14, #31; \ + mov x5, #0x13; \ + umull x5, w5, w0; \ + add x7, x7, x5; \ + adds x7, x7, x11, lsl #32; \ + extr x3, x12, x11, #32; \ + adcs x8, x8, x3; \ + extr x3, x13, x12, #32; \ + adcs x9, x9, x3; \ + extr x3, x14, x13, #32; \ + lsl x5, x0, #63; \ + eor x10, x10, x5; \ + adc x10, x10, x3; \ + stp x7, x8, [P0]; \ + stp x9, x10, [P0+16] + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + ldp x10, x11, [P1]; \ + ldp x12, x13, [P1+16]; \ + umull x2, w10, w10; \ + lsr x14, x10, #32; \ + umull x3, w14, w14; \ + umull x14, w10, w14; \ + adds x2, x2, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x3, x3, x14; \ + umull x4, w11, w11; \ + lsr x14, x11, #32; \ + umull x5, w14, w14; \ + umull x14, w11, w14; \ + mul x15, x10, x11; \ + umulh x16, x10, x11; \ + adds x4, x4, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x5, x5, x14; \ + adds x15, x15, x15; \ + adcs x16, x16, x16; \ + adc x5, x5, xzr; \ + adds x3, x3, x15; \ + adcs x4, x4, x16; \ + adc x5, x5, xzr; \ + umull x6, w12, w12; \ + lsr x14, x12, #32; \ + umull x7, w14, w14; \ + umull x14, w12, w14; \ + adds x6, x6, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x7, x7, x14; \ + umull x8, w13, w13; \ + lsr x14, x13, #32; \ + umull x9, w14, w14; \ + umull x14, w13, w14; \ + mul x15, x12, x13; \ + umulh x16, x12, x13; \ + adds x8, x8, x14, lsl #33; \ + lsr x14, x14, #31; \ + adc x9, x9, x14; \ + adds x15, x15, x15; \ + adcs x16, x16, x16; \ + adc x9, x9, xzr; \ + adds x7, x7, x15; \ + adcs x8, x8, x16; \ + adc x9, x9, xzr; \ + subs x10, x10, x12; \ + sbcs x11, x11, x13; \ + csetm x16, cc; \ + eor x10, x10, x16; \ + subs x10, x10, x16; \ + eor x11, x11, x16; \ + sbc x11, x11, x16; \ + adds x6, x6, x4; \ + adcs x7, x7, x5; \ + adcs x8, x8, xzr; \ + adc x9, x9, xzr; \ + umull x12, w10, w10; \ + lsr x5, x10, #32; \ + umull x13, w5, w5; \ + umull x5, w10, w5; \ + adds x12, x12, x5, lsl #33; \ + lsr x5, x5, #31; \ + adc x13, x13, x5; \ + umull x15, w11, w11; \ + lsr x5, x11, #32; \ + umull x14, w5, w5; \ + umull x5, w11, w5; \ + mul x4, x10, x11; \ + umulh x16, x10, x11; \ + adds x15, x15, x5, lsl #33; \ + lsr x5, x5, #31; \ + adc x14, x14, x5; \ + adds x4, x4, x4; \ + adcs x16, x16, x16; \ + adc x14, x14, xzr; \ + adds x13, x13, x4; \ + adcs x15, x15, x16; \ + adc x14, x14, xzr; \ + adds x4, x2, x6; \ + adcs x5, x3, x7; \ + adcs x6, x6, x8; \ + adcs x7, x7, x9; \ + csetm x16, cc; \ + subs x4, x4, x12; \ + sbcs x5, x5, x13; \ + sbcs x6, x6, x15; \ + sbcs x7, x7, x14; \ + adcs x8, x8, x16; \ + adc x9, x9, x16; \ + mov x10, #0x26; \ + umull x12, w6, w10; \ + add x12, x12, w2, uxtw; \ + lsr x2, x2, #32; \ + lsr x6, x6, #32; \ + umaddl x6, w6, w10, x2; \ + mov x2, x12; \ + umull x12, w7, w10; \ + add x12, x12, w3, uxtw; \ + lsr x3, x3, #32; \ + lsr x7, x7, #32; \ + umaddl x7, w7, w10, x3; \ + mov x3, x12; \ + umull x12, w8, w10; \ + add x12, x12, w4, uxtw; \ + lsr x4, x4, #32; \ + lsr x8, x8, #32; \ + umaddl x8, w8, w10, x4; \ + mov x4, x12; \ + umull x12, w9, w10; \ + add x12, x12, w5, uxtw; \ + lsr x5, x5, #32; \ + lsr x9, x9, #32; \ + umaddl x9, w9, w10, x5; \ + mov x5, x12; \ + lsr x13, x9, #31; \ + mov x11, #0x13; \ + umull x11, w11, w13; \ + add x2, x2, x11; \ + adds x2, x2, x6, lsl #32; \ + extr x10, x7, x6, #32; \ + adcs x3, x3, x10; \ + extr x10, x8, x7, #32; \ + adcs x4, x4, x10; \ + extr x10, x9, x8, #32; \ + lsl x11, x13, #63; \ + eor x5, x5, x11; \ + adc x5, x5, x10; \ + stp x2, x3, [P0]; \ + stp x4, x5, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +// Load the constant k_25519 = 2 * d_25519 using immediate operations + +#define load_k25519(P0) \ + movz x0, #0xf159; \ + movz x1, #0xb156; \ + movz x2, #0xd130; \ + movz x3, #0xfce7; \ + movk x0, #0x26b2, lsl #16; \ + movk x1, #0x8283, lsl #16; \ + movk x2, #0xeef3, lsl #16; \ + movk x3, #0x56df, lsl #16; \ + movk x0, #0x9b94, lsl #32; \ + movk x1, #0x149a, lsl #32; \ + movk x2, #0x80f2, lsl #32; \ + movk x3, #0xd9dc, lsl #32; \ + movk x0, #0xebd6, lsl #48; \ + movk x1, #0x00e0, lsl #48; \ + movk x2, #0x198e, lsl #48; \ + movk x3, #0x2406, lsl #48; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16] + +S2N_BN_SYMBOL(edwards25519_scalarmuldouble): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + stp x25, x30, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy scalars while recoding all 4-bit nybbles except the top +// one (bits 252..255) into signed 4-bit digits. This is essentially +// done just by adding the recoding constant 0x0888..888, after +// which all digits except the first have an implicit bias of -8, +// so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7. +// (We could literally create 2s complement signed nybbles by +// XORing with the same constant 0x0888..888 afterwards, but it +// doesn't seem to make the end usage any simpler.) +// +// In order to ensure that the unrecoded top nybble (bits 252..255) +// does not become > 8 as a result of carries lower down from the +// recoding, we first (conceptually) subtract the group order iff +// the top digit of the scalar is > 2^63. In the implementation the +// reduction and recoding are combined by optionally using the +// modified recoding constant 0x0888...888 + (2^256 - group_order). + + movbig(x4,#0xc7f5, #0x6fb5, #0xa0d9, #0xe920) + movbig(x5,#0xe190, #0xb993, #0x70cb, #0xa1d5) + mov x7, #0x8888888888888888 + sub x6, x7, #1 + bic x8, x7, #0xF000000000000000 + + ldp x10, x11, [x3] + ldp x12, x13, [x3, #16] + mov x3, 0x8000000000000000 + cmp x3, x13 + csel x14, x7, x4, cs + csel x15, x7, x5, cs + csel x16, x7, x6, cs + csel x17, x8, x7, cs + adds x10, x10, x14 + adcs x11, x11, x15 + adcs x12, x12, x16 + adc x13, x13, x17 + stp x10, x11, [bscalar] + stp x12, x13, [bscalar+16] + + ldp x10, x11, [x1] + ldp x12, x13, [x1, #16] + mov x3, 0x8000000000000000 + cmp x3, x13 + csel x14, x7, x4, cs + csel x15, x7, x5, cs + csel x16, x7, x6, cs + csel x17, x8, x7, cs + adds x10, x10, x14 + adcs x11, x11, x15 + adcs x12, x12, x16 + adc x13, x13, x17 + stp x10, x11, [scalar] + stp x12, x13, [scalar+16] + +// Create table of multiples 1..8 of the general input point at "tab". +// Reduce the input coordinates x and y modulo 2^256 - 38 first, for the +// sake of definiteness; this is the reduction that will be maintained. +// We could slightly optimize the additions because we know the input +// point is affine (so Z = 1), but it doesn't seem worth the complication. + + ldp x10, x11, [x2] + ldp x12, x13, [x2, #16] + adds x14, x10, #38 + adcs x15, x11, xzr + adcs x16, x12, xzr + adcs x17, x13, xzr + csel x10, x14, x10, cs + csel x11, x15, x11, cs + csel x12, x16, x12, cs + csel x13, x17, x13, cs + stp x10, x11, [tab] + stp x12, x13, [tab+16] + + ldp x10, x11, [x2, #32] + ldp x12, x13, [x2, #48] + adds x14, x10, #38 + adcs x15, x11, xzr + adcs x16, x12, xzr + adcs x17, x13, xzr + csel x10, x14, x10, cs + csel x11, x15, x11, cs + csel x12, x16, x12, cs + csel x13, x17, x13, cs + stp x10, x11, [tab+32] + stp x12, x13, [tab+48] + + mov x1, #1 + stp x1, xzr, [tab+64] + stp xzr, xzr, [tab+80] + + add p0, tab+96 + add p1, tab + add p2, tab+32 + mul_4(x_0,x_1,x_2) + +// Multiple 2 + + add p0, tab+1*128 + add p1, tab + bl edwards25519_scalarmuldouble_epdouble + +// Multiple 3 + + add p0, tab+2*128 + add p1, tab + add p2, tab+1*128 + bl edwards25519_scalarmuldouble_epadd + +// Multiple 4 + + add p0, tab+3*128 + add p1, tab+1*128 + bl edwards25519_scalarmuldouble_epdouble + +// Multiple 5 + + add p0, tab+4*128 + add p1, tab + add p2, tab+3*128 + bl edwards25519_scalarmuldouble_epadd + +// Multiple 6 + + add p0, tab+5*128 + add p1, tab+2*128 + bl edwards25519_scalarmuldouble_epdouble + +// Multiple 7 + + add p0, tab+6*128 + add p1, tab + add p2, tab+5*128 + bl edwards25519_scalarmuldouble_epadd + +// Multiple 8 + + add p0, tab+7*128 + add p1, tab+3*128 + bl edwards25519_scalarmuldouble_epdouble + +// Handle the initialization, starting the loop counter at i = 252 +// and initializing acc to the sum of the table entries for the +// top nybbles of the scalars (the ones with no implicit -8 bias). + + mov i, #252 + +// Index for btable entry... + + ldr x0, [bscalar+24] + lsr bf, x0, #60 + +// ...and constant-time indexing based on that index + + adr x14, edwards25519_scalarmuldouble_table + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp bf, #1 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #2 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #3 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #4 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #5 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #6 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #7 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #8 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + + stp x0, x1, [btabent] + stp x2, x3, [btabent+16] + stp x4, x5, [btabent+32] + stp x6, x7, [btabent+48] + stp x8, x9, [btabent+64] + stp x10, x11, [btabent+80] + +// Index for table entry... + + ldr x0, [scalar+24] + lsr bf, x0, #60 + +// ...and constant-time indexing based on that index + + add p0, tab + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, #1 + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + + cmp bf, #1 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #2 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #3 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #4 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #5 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #6 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #7 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #8 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + stp x4, x5, [tabent+32] + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + stp x12, x13, [tabent+96] + stp x14, x15, [tabent+112] + +// Add those elements to initialize the accumulator for bit position 252 + + add p0, acc + add p1, tabent + add p2, btabent + bl edwards25519_scalarmuldouble_pepadd + +// Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint +// Start with i = 252 for bits 248..251 and go down four at a time to 3..0 + +edwards25519_scalarmuldouble_loop: + + sub i, i, #4 + +// Double to acc' = 2 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_pdouble + +// Get btable entry, first getting the adjusted bitfield... + + lsr x0, i, #6 + add x1, bscalar + ldr x2, [x1, x0, lsl #3] + lsr x3, x2, i + and x0, x3, #15 + subs bf, x0, #8 + cneg bf, bf, cc + csetm cf, cc + +// ... then doing constant-time lookup with the appropriate index... + + adr x14, edwards25519_scalarmuldouble_table + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp bf, #1 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #2 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #3 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #4 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #5 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #6 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #7 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #8 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + +// ... then optionally negating before storing. The table entry +// is in precomputed form and we currently have +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// Negation for Edwards curves is -(x,y) = (-x,y), which in this modified +// form amounts to swapping the first two fields and negating the third. +// The negation does not always fully reduce even mod 2^256-38 in the zero +// case, instead giving -0 = 2^256-38. But that is fine since the result is +// always fed to a multipliction inside the "pepadd" function below that +// handles any 256-bit input. + + cmp cf, xzr + + csel x12, x0, x4, eq + csel x4, x0, x4, ne + csel x13, x1, x5, eq + csel x5, x1, x5, ne + csel x14, x2, x6, eq + csel x6, x2, x6, ne + csel x15, x3, x7, eq + csel x7, x3, x7, ne + + eor x8, x8, cf + eor x9, x9, cf + eor x10, x10, cf + eor x11, x11, cf + mov x0, #37 + and x0, x0, cf + subs x8, x8, x0 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + + stp x12, x13, [btabent] + stp x14, x15, [btabent+16] + stp x4, x5, [btabent+32] + stp x6, x7, [btabent+48] + stp x8, x9, [btabent+64] + stp x10, x11, [btabent+80] + +// Get table entry, first getting the adjusted bitfield... + + lsr x0, i, #6 + ldr x1, [sp, x0, lsl #3] + lsr x2, x1, i + and x0, x2, #15 + subs bf, x0, #8 + cneg bf, bf, cc + csetm cf, cc + +// ... then getting the unadjusted table entry + + add p0, tab + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, #1 + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + + cmp bf, #1 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #2 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #3 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #4 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #5 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #6 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #7 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #8 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + +// ... then optionally negating before storing. This time the table +// entry is extended-projective, and is in registers thus: +// +// [x3;x2;x1;x0] = X +// [x7;x6;x5;x4] = Y +// [x11;x10;x9;x8] = Z +// [x15;x14;x13;x12] = W +// +// This time we just need to negate the X and the W fields. +// The crude way negation is done can result in values of X or W +// (when initially zero before negation) being exactly equal to +// 2^256-38, but the "pepadd" function handles that correctly. + + eor x0, x0, cf + eor x1, x1, cf + eor x2, x2, cf + eor x3, x3, cf + mov x16, #37 + and x16, x16, cf + subs x0, x0, x16 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x3, x3, xzr + + eor x12, x12, cf + eor x13, x13, cf + eor x14, x14, cf + eor x15, x15, cf + subs x12, x12, x16 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + stp x4, x5, [tabent+32] + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + stp x12, x13, [tabent+96] + stp x14, x15, [tabent+112] + +// Double to acc' = 4 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_pdouble + +// Add tabent := tabent + btabent + + add p0, tabent + add p1, tabent + add p2, btabent + bl edwards25519_scalarmuldouble_pepadd + +// Double to acc' = 8 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_pdouble + +// Double to acc' = 16 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_epdouble + +// Add table entry, acc := acc + tabent + + add p0, acc + add p1, acc + add p2, tabent + bl edwards25519_scalarmuldouble_epadd + +// Loop down + + cbnz i, edwards25519_scalarmuldouble_loop + +// Modular inverse setup + + mov x0, #4 + add x1, tabent + add x2, acc+64 + adr x3, edwards25519_scalarmuldouble_p25519 + add x4, btabent + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + +edwards25519_scalarmuldouble_modinv: + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +edwards25519_scalarmuldouble_copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc edwards25519_scalarmuldouble_copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +edwards25519_scalarmuldouble_outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +edwards25519_scalarmuldouble_toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc edwards25519_scalarmuldouble_toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +edwards25519_scalarmuldouble_innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, edwards25519_scalarmuldouble_innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +edwards25519_scalarmuldouble_congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc edwards25519_scalarmuldouble_congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, edwards25519_scalarmuldouble_wmontend +edwards25519_scalarmuldouble_wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_wmontloop +edwards25519_scalarmuldouble_wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +edwards25519_scalarmuldouble_wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +edwards25519_scalarmuldouble_wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, edwards25519_scalarmuldouble_zmontend +edwards25519_scalarmuldouble_zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_zmontloop +edwards25519_scalarmuldouble_zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +edwards25519_scalarmuldouble_zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +edwards25519_scalarmuldouble_zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +edwards25519_scalarmuldouble_crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc edwards25519_scalarmuldouble_crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, edwards25519_scalarmuldouble_negskip1 +edwards25519_scalarmuldouble_negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, edwards25519_scalarmuldouble_negloop1 +edwards25519_scalarmuldouble_negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, edwards25519_scalarmuldouble_negskip2 +edwards25519_scalarmuldouble_negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, edwards25519_scalarmuldouble_negloop2 +edwards25519_scalarmuldouble_negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +edwards25519_scalarmuldouble_wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +edwards25519_scalarmuldouble_zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_zfliploop + subs x2, x2, #0x3a + b.hi edwards25519_scalarmuldouble_outerloop + +// Store result. Note that these are the only reductions mod 2^255-19 + + mov p0, res + add p1, acc + add p2, tabent + mul_p25519(x_0,x_1,x_2) + + add p0, res, #32 + add p1, acc+32 + add p2, tabent + mul_p25519(x_0,x_1,x_2) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x25, x30, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// Localized versions of subroutines. +// These are close to the standalone functions "edwards25519_epdouble" etc., +// but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19. +// **************************************************************************** + +edwards25519_scalarmuldouble_epdouble: + sub sp, sp, #(5*NUMSIZE) + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(w_0,t1,t4) + mul_4(x_0,t1,t3) + add sp, sp, #(5*NUMSIZE) + ret + +edwards25519_scalarmuldouble_pdouble: + sub sp, sp, #(5*NUMSIZE) + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(x_0,t1,t3) + add sp, sp, #(5*NUMSIZE) + ret + +edwards25519_scalarmuldouble_epadd: + sub sp, sp, #(6*NUMSIZE) + mul_4(t0,w_1,w_2) + sub_twice4(t1,y_1,x_1) + sub_twice4(t2,y_2,x_2) + add_twice4(t3,y_1,x_1) + add_twice4(t4,y_2,x_2) + double_twice4(t5,z_2) + mul_4(t1,t1,t2) + mul_4(t3,t3,t4) + load_k25519(t2) + mul_4(t2,t2,t0) + mul_4(t4,z_1,t5) + sub_twice4(t0,t3,t1) + add_twice4(t5,t3,t1) + sub_twice4(t1,t4,t2) + add_twice4(t3,t4,t2) + mul_4(w_0,t0,t5) + mul_4(x_0,t0,t1) + mul_4(y_0,t3,t5) + mul_4(z_0,t1,t3) + add sp, sp, #(6*NUMSIZE) + ret + +edwards25519_scalarmuldouble_pepadd: + sub sp, sp, #(6*NUMSIZE) + double_twice4(t0,z_1); + sub_twice4(t1,y_1,x_1); + add_twice4(t2,y_1,x_1); + mul_4(t3,w_1,z_2); + mul_4(t1,t1,x_2); + mul_4(t2,t2,y_2); + sub_twice4(t4,t0,t3); + add_twice4(t0,t0,t3); + sub_twice4(t5,t2,t1); + add_twice4(t1,t2,t1); + mul_4(z_0,t4,t0); + mul_4(x_0,t5,t4); + mul_4(y_0,t0,t1); + mul_4(w_0,t5,t1); + add sp, sp, #(6*NUMSIZE) + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +edwards25519_scalarmuldouble_p25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_scalarmuldouble_table: + + // 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/edwards25519_scalarmuldouble_alt.S b/arm/curve25519/edwards25519_scalarmuldouble_alt.S new file mode 100644 index 0000000000..c8fe77c31f --- /dev/null +++ b/arm/curve25519/edwards25519_scalarmuldouble_alt.S @@ -0,0 +1,2252 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Double scalar multiplication for edwards25519, fresh and base point +// Input scalar[4], point[8], bscalar[4]; output res[8] +// +// extern void edwards25519_scalarmuldouble_alt +// (uint64_t res[static 8],uint64_t scalar[static 4], +// uint64_t point[static 8],uint64_t bscalar[static 4]); +// +// Given scalar = n, point = P and bscalar = m, returns in res +// the point (X,Y) = n * P + m * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Both 256-bit coordinates of the input point P are implicitly +// reduced modulo 2^255-19 if they are not already in reduced form, +// but the conventional usage is that they *are* already reduced. +// The scalars can be arbitrary 256-bit numbers but may also be +// considered as implicitly reduced modulo the group order. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point, X3 = bscalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmuldouble_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmuldouble_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Stable home for the input result argument during the whole body + +#define res x25 + +// Additional pointer variables for local subroutines + +#define p0 x22 +#define p1 x23 +#define p2 x24 + +// Other variables that are only needed prior to the modular inverse. + +#define i x19 +#define bf x20 +#define cf x21 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. + +#define resx res, #(0*NUMSIZE) +#define resy res, #(1*NUMSIZE) + +#define scalar sp, #(0*NUMSIZE) +#define bscalar sp, #(1*NUMSIZE) + +#define acc sp, #(2*NUMSIZE) +#define acc_x sp, #(2*NUMSIZE) +#define acc_y sp, #(3*NUMSIZE) +#define acc_z sp, #(4*NUMSIZE) +#define acc_w sp, #(5*NUMSIZE) + +#define tabent sp, #(6*NUMSIZE) +#define btabent sp, #(10*NUMSIZE) + +#define tab sp, #(13*NUMSIZE) + +// Total size to reserve on the stack (excluding local subroutines) + +#define NSPACE (45*NUMSIZE) + +// Sub-references used in local subroutines with local stack + +#define x_0 p0, #0 +#define y_0 p0, #NUMSIZE +#define z_0 p0, #(2*NUMSIZE) +#define w_0 p0, #(3*NUMSIZE) + +#define x_1 p1, #0 +#define y_1 p1, #NUMSIZE +#define z_1 p1, #(2*NUMSIZE) +#define w_1 p1, #(3*NUMSIZE) + +#define x_2 p2, #0 +#define y_2 p2, #NUMSIZE +#define z_2 p2, #(2*NUMSIZE) +#define w_2 p2, #(3*NUMSIZE) + +#define t0 sp, #(0*NUMSIZE) +#define t1 sp, #(1*NUMSIZE) +#define t2 sp, #(2*NUMSIZE) +#define t3 sp, #(3*NUMSIZE) +#define t4 sp, #(4*NUMSIZE) +#define t5 sp, #(5*NUMSIZE) + +// Load 64-bit immediate into a register + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// Macro wrapping up the basic field operation bignum_mul_p25519_alt, only +// trivially different from a pure function call to that subroutine. + +#define mul_p25519(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + orr x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + madd x11, x7, x8, x7; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adcs x15, x15, xzr; \ + csel x7, x7, xzr, cc; \ + subs x12, x12, x7; \ + sbcs x13, x13, xzr; \ + sbcs x14, x14, xzr; \ + sbc x15, x15, xzr; \ + and x15, x15, #0x7fffffffffffffff; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + mul x12, x3, x7; \ + umulh x13, x3, x7; \ + mul x11, x3, x8; \ + umulh x14, x3, x8; \ + adds x13, x13, x11; \ + ldp x9, x10, [P2+16]; \ + mul x11, x3, x9; \ + umulh x15, x3, x9; \ + adcs x14, x14, x11; \ + mul x11, x3, x10; \ + umulh x16, x3, x10; \ + adcs x15, x15, x11; \ + adc x16, x16, xzr; \ + ldp x5, x6, [P1+16]; \ + mul x11, x4, x7; \ + adds x13, x13, x11; \ + mul x11, x4, x8; \ + adcs x14, x14, x11; \ + mul x11, x4, x9; \ + adcs x15, x15, x11; \ + mul x11, x4, x10; \ + adcs x16, x16, x11; \ + umulh x3, x4, x10; \ + adc x3, x3, xzr; \ + umulh x11, x4, x7; \ + adds x14, x14, x11; \ + umulh x11, x4, x8; \ + adcs x15, x15, x11; \ + umulh x11, x4, x9; \ + adcs x16, x16, x11; \ + adc x3, x3, xzr; \ + mul x11, x5, x7; \ + adds x14, x14, x11; \ + mul x11, x5, x8; \ + adcs x15, x15, x11; \ + mul x11, x5, x9; \ + adcs x16, x16, x11; \ + mul x11, x5, x10; \ + adcs x3, x3, x11; \ + umulh x4, x5, x10; \ + adc x4, x4, xzr; \ + umulh x11, x5, x7; \ + adds x15, x15, x11; \ + umulh x11, x5, x8; \ + adcs x16, x16, x11; \ + umulh x11, x5, x9; \ + adcs x3, x3, x11; \ + adc x4, x4, xzr; \ + mul x11, x6, x7; \ + adds x15, x15, x11; \ + mul x11, x6, x8; \ + adcs x16, x16, x11; \ + mul x11, x6, x9; \ + adcs x3, x3, x11; \ + mul x11, x6, x10; \ + adcs x4, x4, x11; \ + umulh x5, x6, x10; \ + adc x5, x5, xzr; \ + umulh x11, x6, x7; \ + adds x16, x16, x11; \ + umulh x11, x6, x8; \ + adcs x3, x3, x11; \ + umulh x11, x6, x9; \ + adcs x4, x4, x11; \ + adc x5, x5, xzr; \ + mov x7, #0x26; \ + mul x11, x7, x16; \ + umulh x9, x7, x16; \ + adds x12, x12, x11; \ + mul x11, x7, x3; \ + umulh x3, x7, x3; \ + adcs x13, x13, x11; \ + mul x11, x7, x4; \ + umulh x4, x7, x4; \ + adcs x14, x14, x11; \ + mul x11, x7, x5; \ + umulh x5, x7, x5; \ + adcs x15, x15, x11; \ + cset x16, cs; \ + adds x15, x15, x4; \ + adc x16, x16, x5; \ + cmn x15, x15; \ + bic x15, x15, #0x8000000000000000; \ + adc x8, x16, x16; \ + mov x7, #0x13; \ + mul x11, x7, x8; \ + adds x12, x12, x11; \ + adcs x13, x13, x9; \ + adcs x14, x14, x3; \ + adc x15, x15, xzr; \ + stp x12, x13, [P0]; \ + stp x14, x15, [P0+16] + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + ldp x2, x3, [P1]; \ + mul x9, x2, x3; \ + umulh x10, x2, x3; \ + ldp x4, x5, [P1+16]; \ + mul x11, x2, x5; \ + umulh x12, x2, x5; \ + mul x7, x2, x4; \ + umulh x6, x2, x4; \ + adds x10, x10, x7; \ + adcs x11, x11, x6; \ + mul x7, x3, x4; \ + umulh x6, x3, x4; \ + adc x6, x6, xzr; \ + adds x11, x11, x7; \ + mul x13, x4, x5; \ + umulh x14, x4, x5; \ + adcs x12, x12, x6; \ + mul x7, x3, x5; \ + umulh x6, x3, x5; \ + adc x6, x6, xzr; \ + adds x12, x12, x7; \ + adcs x13, x13, x6; \ + adc x14, x14, xzr; \ + adds x9, x9, x9; \ + adcs x10, x10, x10; \ + adcs x11, x11, x11; \ + adcs x12, x12, x12; \ + adcs x13, x13, x13; \ + adcs x14, x14, x14; \ + cset x6, cs; \ + umulh x7, x2, x2; \ + mul x8, x2, x2; \ + adds x9, x9, x7; \ + mul x7, x3, x3; \ + adcs x10, x10, x7; \ + umulh x7, x3, x3; \ + adcs x11, x11, x7; \ + mul x7, x4, x4; \ + adcs x12, x12, x7; \ + umulh x7, x4, x4; \ + adcs x13, x13, x7; \ + mul x7, x5, x5; \ + adcs x14, x14, x7; \ + umulh x7, x5, x5; \ + adc x6, x6, x7; \ + mov x3, #0x26; \ + mul x7, x3, x12; \ + umulh x4, x3, x12; \ + adds x8, x8, x7; \ + mul x7, x3, x13; \ + umulh x13, x3, x13; \ + adcs x9, x9, x7; \ + mul x7, x3, x14; \ + umulh x14, x3, x14; \ + adcs x10, x10, x7; \ + mul x7, x3, x6; \ + umulh x6, x3, x6; \ + adcs x11, x11, x7; \ + cset x12, cs; \ + adds x11, x11, x14; \ + adc x12, x12, x6; \ + cmn x11, x11; \ + bic x11, x11, #0x8000000000000000; \ + adc x2, x12, x12; \ + mov x3, #0x13; \ + mul x7, x3, x2; \ + adds x8, x8, x7; \ + adcs x9, x9, x4; \ + adcs x10, x10, x13; \ + adc x11, x11, xzr; \ + stp x8, x9, [P0]; \ + stp x10, x11, [P0+16] + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + subs x5, x5, x4; \ + sbcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + mov x4, #38; \ + csel x3, x4, xzr, lo; \ + subs x5, x5, x3; \ + sbcs x6, x6, xzr; \ + sbcs x7, x7, xzr; \ + sbc x8, x8, xzr; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16] + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + ldp x3, x4, [P1]; \ + ldp x7, x8, [P2]; \ + adds x3, x3, x7; \ + adcs x4, x4, x8; \ + ldp x5, x6, [P1+16]; \ + ldp x7, x8, [P2+16]; \ + adcs x5, x5, x7; \ + adcs x6, x6, x8; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +#define double_twice4(P0,P1) \ + ldp x3, x4, [P1]; \ + adds x3, x3, x3; \ + adcs x4, x4, x4; \ + ldp x5, x6, [P1+16]; \ + adcs x5, x5, x5; \ + adcs x6, x6, x6; \ + mov x9, #38; \ + csel x9, x9, xzr, cs; \ + adds x3, x3, x9; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + adc x6, x6, xzr; \ + stp x3, x4, [P0]; \ + stp x5, x6, [P0+16] + +// Load the constant k_25519 = 2 * d_25519 using immediate operations + +#define load_k25519(P0) \ + movz x0, #0xf159; \ + movz x1, #0xb156; \ + movz x2, #0xd130; \ + movz x3, #0xfce7; \ + movk x0, #0x26b2, lsl #16; \ + movk x1, #0x8283, lsl #16; \ + movk x2, #0xeef3, lsl #16; \ + movk x3, #0x56df, lsl #16; \ + movk x0, #0x9b94, lsl #32; \ + movk x1, #0x149a, lsl #32; \ + movk x2, #0x80f2, lsl #32; \ + movk x3, #0xd9dc, lsl #32; \ + movk x0, #0xebd6, lsl #48; \ + movk x1, #0x00e0, lsl #48; \ + movk x2, #0x198e, lsl #48; \ + movk x3, #0x2406, lsl #48; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16] + +S2N_BN_SYMBOL(edwards25519_scalarmuldouble_alt): + +// Save regs and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + stp x25, x30, [sp, -16]! + sub sp, sp, #NSPACE + +// Move the output pointer to a stable place + + mov res, x0 + +// Copy scalars while recoding all 4-bit nybbles except the top +// one (bits 252..255) into signed 4-bit digits. This is essentially +// done just by adding the recoding constant 0x0888..888, after +// which all digits except the first have an implicit bias of -8, +// so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7. +// (We could literally create 2s complement signed nybbles by +// XORing with the same constant 0x0888..888 afterwards, but it +// doesn't seem to make the end usage any simpler.) +// +// In order to ensure that the unrecoded top nybble (bits 252..255) +// does not become > 8 as a result of carries lower down from the +// recoding, we first (conceptually) subtract the group order iff +// the top digit of the scalar is > 2^63. In the implementation the +// reduction and recoding are combined by optionally using the +// modified recoding constant 0x0888...888 + (2^256 - group_order). + + movbig(x4,#0xc7f5, #0x6fb5, #0xa0d9, #0xe920) + movbig(x5,#0xe190, #0xb993, #0x70cb, #0xa1d5) + mov x7, #0x8888888888888888 + sub x6, x7, #1 + bic x8, x7, #0xF000000000000000 + + ldp x10, x11, [x3] + ldp x12, x13, [x3, #16] + mov x3, 0x8000000000000000 + cmp x3, x13 + csel x14, x7, x4, cs + csel x15, x7, x5, cs + csel x16, x7, x6, cs + csel x17, x8, x7, cs + adds x10, x10, x14 + adcs x11, x11, x15 + adcs x12, x12, x16 + adc x13, x13, x17 + stp x10, x11, [bscalar] + stp x12, x13, [bscalar+16] + + ldp x10, x11, [x1] + ldp x12, x13, [x1, #16] + mov x3, 0x8000000000000000 + cmp x3, x13 + csel x14, x7, x4, cs + csel x15, x7, x5, cs + csel x16, x7, x6, cs + csel x17, x8, x7, cs + adds x10, x10, x14 + adcs x11, x11, x15 + adcs x12, x12, x16 + adc x13, x13, x17 + stp x10, x11, [scalar] + stp x12, x13, [scalar+16] + +// Create table of multiples 1..8 of the general input point at "tab". +// Reduce the input coordinates x and y modulo 2^256 - 38 first, for the +// sake of definiteness; this is the reduction that will be maintained. +// We could slightly optimize the additions because we know the input +// point is affine (so Z = 1), but it doesn't seem worth the complication. + + ldp x10, x11, [x2] + ldp x12, x13, [x2, #16] + adds x14, x10, #38 + adcs x15, x11, xzr + adcs x16, x12, xzr + adcs x17, x13, xzr + csel x10, x14, x10, cs + csel x11, x15, x11, cs + csel x12, x16, x12, cs + csel x13, x17, x13, cs + stp x10, x11, [tab] + stp x12, x13, [tab+16] + + ldp x10, x11, [x2, #32] + ldp x12, x13, [x2, #48] + adds x14, x10, #38 + adcs x15, x11, xzr + adcs x16, x12, xzr + adcs x17, x13, xzr + csel x10, x14, x10, cs + csel x11, x15, x11, cs + csel x12, x16, x12, cs + csel x13, x17, x13, cs + stp x10, x11, [tab+32] + stp x12, x13, [tab+48] + + mov x1, #1 + stp x1, xzr, [tab+64] + stp xzr, xzr, [tab+80] + + add p0, tab+96 + add p1, tab + add p2, tab+32 + mul_4(x_0,x_1,x_2) + +// Multiple 2 + + add p0, tab+1*128 + add p1, tab + bl edwards25519_scalarmuldouble_alt_epdouble + +// Multiple 3 + + add p0, tab+2*128 + add p1, tab + add p2, tab+1*128 + bl edwards25519_scalarmuldouble_alt_epadd + +// Multiple 4 + + add p0, tab+3*128 + add p1, tab+1*128 + bl edwards25519_scalarmuldouble_alt_epdouble + +// Multiple 5 + + add p0, tab+4*128 + add p1, tab + add p2, tab+3*128 + bl edwards25519_scalarmuldouble_alt_epadd + +// Multiple 6 + + add p0, tab+5*128 + add p1, tab+2*128 + bl edwards25519_scalarmuldouble_alt_epdouble + +// Multiple 7 + + add p0, tab+6*128 + add p1, tab + add p2, tab+5*128 + bl edwards25519_scalarmuldouble_alt_epadd + +// Multiple 8 + + add p0, tab+7*128 + add p1, tab+3*128 + bl edwards25519_scalarmuldouble_alt_epdouble + +// Handle the initialization, starting the loop counter at i = 252 +// and initializing acc to the sum of the table entries for the +// top nybbles of the scalars (the ones with no implicit -8 bias). + + mov i, #252 + +// Index for btable entry... + + ldr x0, [bscalar+24] + lsr bf, x0, #60 + +// ...and constant-time indexing based on that index + + adr x14, edwards25519_scalarmuldouble_alt_table + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp bf, #1 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #2 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #3 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #4 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #5 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #6 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #7 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #8 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + + stp x0, x1, [btabent] + stp x2, x3, [btabent+16] + stp x4, x5, [btabent+32] + stp x6, x7, [btabent+48] + stp x8, x9, [btabent+64] + stp x10, x11, [btabent+80] + +// Index for table entry... + + ldr x0, [scalar+24] + lsr bf, x0, #60 + +// ...and constant-time indexing based on that index + + add p0, tab + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, #1 + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + + cmp bf, #1 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #2 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #3 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #4 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #5 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #6 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #7 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #8 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + stp x4, x5, [tabent+32] + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + stp x12, x13, [tabent+96] + stp x14, x15, [tabent+112] + +// Add those elements to initialize the accumulator for bit position 252 + + add p0, acc + add p1, tabent + add p2, btabent + bl edwards25519_scalarmuldouble_alt_pepadd + +// Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint +// Start with i = 252 for bits 248..251 and go down four at a time to 3..0 + +edwards25519_scalarmuldouble_alt_loop: + + sub i, i, #4 + +// Double to acc' = 2 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_alt_pdouble + +// Get btable entry, first getting the adjusted bitfield... + + lsr x0, i, #6 + add x1, bscalar + ldr x2, [x1, x0, lsl #3] + lsr x3, x2, i + and x0, x3, #15 + subs bf, x0, #8 + cneg bf, bf, cc + csetm cf, cc + +// ... then doing constant-time lookup with the appropriate index... + + adr x14, edwards25519_scalarmuldouble_alt_table + + mov x0, #1 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + + cmp bf, #1 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #2 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #3 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #4 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #5 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #6 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #7 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + add x14, x14, #96 + + cmp bf, #8 + ldp x12, x13, [x14] + csel x0, x0, x12, ne + csel x1, x1, x13, ne + ldp x12, x13, [x14, #16] + csel x2, x2, x12, ne + csel x3, x3, x13, ne + ldp x12, x13, [x14, #32] + csel x4, x4, x12, ne + csel x5, x5, x13, ne + ldp x12, x13, [x14, #48] + csel x6, x6, x12, ne + csel x7, x7, x13, ne + ldp x12, x13, [x14, #64] + csel x8, x8, x12, ne + csel x9, x9, x13, ne + ldp x12, x13, [x14, #80] + csel x10, x10, x12, ne + csel x11, x11, x13, ne + +// ... then optionally negating before storing. The table entry +// is in precomputed form and we currently have +// +// [x3;x2;x1;x0] = y - x +// [x7;x6;x5;x4] = x + y +// [x11;x10;x9;x8] = 2 * d * x * y +// +// Negation for Edwards curves is -(x,y) = (-x,y), which in this modified +// form amounts to swapping the first two fields and negating the third. +// The negation does not always fully reduce even mod 2^256-38 in the zero +// case, instead giving -0 = 2^256-38. But that is fine since the result is +// always fed to a multipliction inside the "pepadd" function below that +// handles any 256-bit input. + + cmp cf, xzr + + csel x12, x0, x4, eq + csel x4, x0, x4, ne + csel x13, x1, x5, eq + csel x5, x1, x5, ne + csel x14, x2, x6, eq + csel x6, x2, x6, ne + csel x15, x3, x7, eq + csel x7, x3, x7, ne + + eor x8, x8, cf + eor x9, x9, cf + eor x10, x10, cf + eor x11, x11, cf + mov x0, #37 + and x0, x0, cf + subs x8, x8, x0 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + + stp x12, x13, [btabent] + stp x14, x15, [btabent+16] + stp x4, x5, [btabent+32] + stp x6, x7, [btabent+48] + stp x8, x9, [btabent+64] + stp x10, x11, [btabent+80] + +// Get table entry, first getting the adjusted bitfield... + + lsr x0, i, #6 + ldr x1, [sp, x0, lsl #3] + lsr x2, x1, i + and x0, x2, #15 + subs bf, x0, #8 + cneg bf, bf, cc + csetm cf, cc + +// ... then getting the unadjusted table entry + + add p0, tab + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, #1 + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, #1 + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + + cmp bf, #1 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #2 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #3 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #4 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #5 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #6 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #7 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + add p0, p0, #128 + + cmp bf, #8 + ldp x16, x17, [p0] + csel x0, x0, x16, ne + csel x1, x1, x17, ne + ldp x16, x17, [p0, #16] + csel x2, x2, x16, ne + csel x3, x3, x17, ne + ldp x16, x17, [p0, #32] + csel x4, x4, x16, ne + csel x5, x5, x17, ne + ldp x16, x17, [p0, #48] + csel x6, x6, x16, ne + csel x7, x7, x17, ne + ldp x16, x17, [p0, #64] + csel x8, x8, x16, ne + csel x9, x9, x17, ne + ldp x16, x17, [p0, #80] + csel x10, x10, x16, ne + csel x11, x11, x17, ne + ldp x16, x17, [p0, #96] + csel x12, x12, x16, ne + csel x13, x13, x17, ne + ldp x16, x17, [p0, #112] + csel x14, x14, x16, ne + csel x15, x15, x17, ne + +// ... then optionally negating before storing. This time the table +// entry is extended-projective, and is in registers thus: +// +// [x3;x2;x1;x0] = X +// [x7;x6;x5;x4] = Y +// [x11;x10;x9;x8] = Z +// [x15;x14;x13;x12] = W +// +// This time we just need to negate the X and the W fields. +// The crude way negation is done can result in values of X or W +// (when initially zero before negation) being exactly equal to +// 2^256-38, but the "pepadd" function handles that correctly. + + eor x0, x0, cf + eor x1, x1, cf + eor x2, x2, cf + eor x3, x3, cf + mov x16, #37 + and x16, x16, cf + subs x0, x0, x16 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x3, x3, xzr + + eor x12, x12, cf + eor x13, x13, cf + eor x14, x14, cf + eor x15, x15, cf + subs x12, x12, x16 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + stp x4, x5, [tabent+32] + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + stp x12, x13, [tabent+96] + stp x14, x15, [tabent+112] + +// Double to acc' = 4 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_alt_pdouble + +// Add tabent := tabent + btabent + + add p0, tabent + add p1, tabent + add p2, btabent + bl edwards25519_scalarmuldouble_alt_pepadd + +// Double to acc' = 8 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_alt_pdouble + +// Double to acc' = 16 * acc + + add p0, acc + add p1, acc + bl edwards25519_scalarmuldouble_alt_epdouble + +// Add table entry, acc := acc + tabent + + add p0, acc + add p1, acc + add p2, tabent + bl edwards25519_scalarmuldouble_alt_epadd + +// Loop down + + cbnz i, edwards25519_scalarmuldouble_alt_loop + +// Modular inverse setup + + mov x0, #4 + add x1, tabent + add x2, acc+64 + adr x3, edwards25519_scalarmuldouble_alt_p25519 + add x4, btabent + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "arm/generic/bignum_modinv.S". + +edwards25519_scalarmuldouble_alt_modinv: + lsl x10, x0, #3 + add x21, x4, x10 + add x22, x21, x10 + mov x10, xzr +edwards25519_scalarmuldouble_alt_copyloop: + ldr x11, [x2, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + str x11, [x21, x10, lsl #3] + str x12, [x22, x10, lsl #3] + str x12, [x4, x10, lsl #3] + str xzr, [x1, x10, lsl #3] + add x10, x10, #0x1 + cmp x10, x0 + b.cc edwards25519_scalarmuldouble_alt_copyloop + ldr x11, [x4] + sub x12, x11, #0x1 + str x12, [x4] + lsl x20, x11, #2 + sub x20, x11, x20 + eor x20, x20, #0x2 + mov x12, #0x1 + madd x12, x11, x20, x12 + mul x11, x12, x12 + madd x20, x12, x20, x20 + mul x12, x11, x11 + madd x20, x11, x20, x20 + mul x11, x12, x12 + madd x20, x12, x20, x20 + madd x20, x11, x20, x20 + lsl x2, x0, #7 +edwards25519_scalarmuldouble_alt_outerloop: + add x10, x2, #0x3f + lsr x5, x10, #6 + cmp x5, x0 + csel x5, x0, x5, cs + mov x13, xzr + mov x15, xzr + mov x14, xzr + mov x16, xzr + mov x19, xzr + mov x10, xzr +edwards25519_scalarmuldouble_alt_toploop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + orr x17, x11, x12 + cmp x17, xzr + and x17, x19, x13 + csel x15, x17, x15, ne + and x17, x19, x14 + csel x16, x17, x16, ne + csel x13, x11, x13, ne + csel x14, x12, x14, ne + csetm x19, ne + add x10, x10, #0x1 + cmp x10, x5 + b.cc edwards25519_scalarmuldouble_alt_toploop + orr x11, x13, x14 + clz x12, x11 + negs x17, x12 + lsl x13, x13, x12 + csel x15, x15, xzr, ne + lsl x14, x14, x12 + csel x16, x16, xzr, ne + lsr x15, x15, x17 + lsr x16, x16, x17 + orr x13, x13, x15 + orr x14, x14, x16 + ldr x15, [x21] + ldr x16, [x22] + mov x6, #0x1 + mov x7, xzr + mov x8, xzr + mov x9, #0x1 + mov x10, #0x3a + tst x15, #0x1 +edwards25519_scalarmuldouble_alt_innerloop: + csel x11, x14, xzr, ne + csel x12, x16, xzr, ne + csel x17, x8, xzr, ne + csel x19, x9, xzr, ne + ccmp x13, x14, #0x2, ne + sub x11, x13, x11 + sub x12, x15, x12 + csel x14, x14, x13, cs + cneg x11, x11, cc + csel x16, x16, x15, cs + cneg x15, x12, cc + csel x8, x8, x6, cs + csel x9, x9, x7, cs + tst x12, #0x2 + add x6, x6, x17 + add x7, x7, x19 + lsr x13, x11, #1 + lsr x15, x15, #1 + add x8, x8, x8 + add x9, x9, x9 + sub x10, x10, #0x1 + cbnz x10, edwards25519_scalarmuldouble_alt_innerloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +edwards25519_scalarmuldouble_alt_congloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + adds x15, x15, x16 + extr x17, x15, x17, #58 + str x17, [x4, x10, lsl #3] + mov x17, x15 + umulh x15, x7, x12 + adc x13, x13, x15 + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + adds x15, x15, x16 + extr x19, x15, x19, #58 + str x19, [x1, x10, lsl #3] + mov x19, x15 + umulh x15, x9, x12 + adc x14, x14, x15 + add x10, x10, #0x1 + cmp x10, x0 + b.cc edwards25519_scalarmuldouble_alt_congloop + extr x13, x13, x17, #58 + extr x14, x14, x19, #58 + ldr x11, [x4] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, edwards25519_scalarmuldouble_alt_wmontend +edwards25519_scalarmuldouble_alt_wmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x4, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_wmontloop +edwards25519_scalarmuldouble_alt_wmontend: + adcs x16, x16, x13 + adc x13, xzr, xzr + sub x15, x10, #0x1 + str x16, [x4, x15, lsl #3] + negs x10, xzr +edwards25519_scalarmuldouble_alt_wcmploop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_wcmploop + sbcs xzr, x13, xzr + csetm x13, cs + negs x10, xzr +edwards25519_scalarmuldouble_alt_wcorrloop: + ldr x11, [x4, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x13 + sbcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_wcorrloop + ldr x11, [x1] + mul x17, x11, x20 + ldr x12, [x3] + mul x15, x17, x12 + umulh x16, x17, x12 + adds x11, x11, x15 + mov x10, #0x1 + sub x11, x0, #0x1 + cbz x11, edwards25519_scalarmuldouble_alt_zmontend +edwards25519_scalarmuldouble_alt_zmontloop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + mul x15, x17, x11 + adcs x12, x12, x16 + umulh x16, x17, x11 + adc x16, x16, xzr + adds x12, x12, x15 + sub x15, x10, #0x1 + str x12, [x1, x15, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_zmontloop +edwards25519_scalarmuldouble_alt_zmontend: + adcs x16, x16, x14 + adc x14, xzr, xzr + sub x15, x10, #0x1 + str x16, [x1, x15, lsl #3] + negs x10, xzr +edwards25519_scalarmuldouble_alt_zcmploop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + sbcs xzr, x11, x12 + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_zcmploop + sbcs xzr, x14, xzr + csetm x14, cs + negs x10, xzr +edwards25519_scalarmuldouble_alt_zcorrloop: + ldr x11, [x1, x10, lsl #3] + ldr x12, [x3, x10, lsl #3] + and x12, x12, x14 + sbcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_zcorrloop + mov x13, xzr + mov x14, xzr + mov x17, xzr + mov x19, xzr + mov x10, xzr +edwards25519_scalarmuldouble_alt_crossloop: + ldr x11, [x21, x10, lsl #3] + ldr x12, [x22, x10, lsl #3] + mul x15, x6, x11 + mul x16, x7, x12 + adds x15, x15, x13 + umulh x13, x6, x11 + adc x13, x13, xzr + subs x15, x15, x16 + str x15, [x21, x10, lsl #3] + umulh x15, x7, x12 + sub x17, x15, x17 + sbcs x13, x13, x17 + csetm x17, cc + mul x15, x8, x11 + mul x16, x9, x12 + adds x15, x15, x14 + umulh x14, x8, x11 + adc x14, x14, xzr + subs x15, x15, x16 + str x15, [x22, x10, lsl #3] + umulh x15, x9, x12 + sub x19, x15, x19 + sbcs x14, x14, x19 + csetm x19, cc + add x10, x10, #0x1 + cmp x10, x5 + b.cc edwards25519_scalarmuldouble_alt_crossloop + cmn x17, x17 + ldr x15, [x21] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, edwards25519_scalarmuldouble_alt_negskip1 +edwards25519_scalarmuldouble_alt_negloop1: + add x11, x10, #0x8 + ldr x12, [x21, x11] + extr x15, x12, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, edwards25519_scalarmuldouble_alt_negloop1 +edwards25519_scalarmuldouble_alt_negskip1: + extr x15, x13, x15, #58 + eor x15, x15, x17 + adcs x15, x15, xzr + str x15, [x21, x10] + cmn x19, x19 + ldr x15, [x22] + mov x10, xzr + sub x6, x5, #0x1 + cbz x6, edwards25519_scalarmuldouble_alt_negskip2 +edwards25519_scalarmuldouble_alt_negloop2: + add x11, x10, #0x8 + ldr x12, [x22, x11] + extr x15, x12, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x15, x12 + add x10, x10, #0x8 + sub x6, x6, #0x1 + cbnz x6, edwards25519_scalarmuldouble_alt_negloop2 +edwards25519_scalarmuldouble_alt_negskip2: + extr x15, x14, x15, #58 + eor x15, x15, x19 + adcs x15, x15, xzr + str x15, [x22, x10] + mov x10, xzr + cmn x17, x17 +edwards25519_scalarmuldouble_alt_wfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x4, x10, lsl #3] + and x11, x11, x17 + eor x12, x12, x17 + adcs x11, x11, x12 + str x11, [x4, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_wfliploop + mvn x19, x19 + mov x10, xzr + cmn x19, x19 +edwards25519_scalarmuldouble_alt_zfliploop: + ldr x11, [x3, x10, lsl #3] + ldr x12, [x1, x10, lsl #3] + and x11, x11, x19 + eor x12, x12, x19 + adcs x11, x11, x12 + str x11, [x1, x10, lsl #3] + add x10, x10, #0x1 + sub x11, x10, x0 + cbnz x11, edwards25519_scalarmuldouble_alt_zfliploop + subs x2, x2, #0x3a + b.hi edwards25519_scalarmuldouble_alt_outerloop + +// Store result. Note that these are the only reductions mod 2^255-19 + + mov p0, res + add p1, acc + add p2, tabent + mul_p25519(x_0,x_1,x_2) + + add p0, res, #32 + add p1, acc+32 + add p2, tabent + mul_p25519(x_0,x_1,x_2) + +// Restore stack and registers + + add sp, sp, #NSPACE + ldp x25, x30, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +// **************************************************************************** +// Localized versions of subroutines. +// These are close to the standalone functions "edwards25519_epdouble" etc., +// but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19. +// **************************************************************************** + +edwards25519_scalarmuldouble_alt_epdouble: + sub sp, sp, #(5*NUMSIZE) + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(w_0,t1,t4) + mul_4(x_0,t1,t3) + add sp, sp, #(5*NUMSIZE) + ret + +edwards25519_scalarmuldouble_alt_pdouble: + sub sp, sp, #(5*NUMSIZE) + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(x_0,t1,t3) + add sp, sp, #(5*NUMSIZE) + ret + +edwards25519_scalarmuldouble_alt_epadd: + sub sp, sp, #(6*NUMSIZE) + mul_4(t0,w_1,w_2) + sub_twice4(t1,y_1,x_1) + sub_twice4(t2,y_2,x_2) + add_twice4(t3,y_1,x_1) + add_twice4(t4,y_2,x_2) + double_twice4(t5,z_2) + mul_4(t1,t1,t2) + mul_4(t3,t3,t4) + load_k25519(t2) + mul_4(t2,t2,t0) + mul_4(t4,z_1,t5) + sub_twice4(t0,t3,t1) + add_twice4(t5,t3,t1) + sub_twice4(t1,t4,t2) + add_twice4(t3,t4,t2) + mul_4(w_0,t0,t5) + mul_4(x_0,t0,t1) + mul_4(y_0,t3,t5) + mul_4(z_0,t1,t3) + add sp, sp, #(6*NUMSIZE) + ret + +edwards25519_scalarmuldouble_alt_pepadd: + sub sp, sp, #(6*NUMSIZE) + double_twice4(t0,z_1); + sub_twice4(t1,y_1,x_1); + add_twice4(t2,y_1,x_1); + mul_4(t3,w_1,z_2); + mul_4(t1,t1,x_2); + mul_4(t2,t2,y_2); + sub_twice4(t4,t0,t3); + add_twice4(t0,t0,t3); + sub_twice4(t5,t2,t1); + add_twice4(t1,t2,t1); + mul_4(z_0,t4,t0); + mul_4(x_0,t5,t4); + mul_4(y_0,t0,t1); + mul_4(w_0,t5,t1); + add sp, sp, #(6*NUMSIZE) + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +edwards25519_scalarmuldouble_alt_p25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_scalarmuldouble_alt_table: + + // 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/edwards25519_scalarmuldouble.S b/x86_att/curve25519/edwards25519_scalarmuldouble.S new file mode 100644 index 0000000000..0138d1a4b2 --- /dev/null +++ b/x86_att/curve25519/edwards25519_scalarmuldouble.S @@ -0,0 +1,2666 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Double scalar multiplication for edwards25519, fresh and base point +// Input scalar[4], point[8], bscalar[4]; output res[8] +// +// extern void edwards25519_scalarmuldouble +// (uint64_t res[static 8],uint64_t scalar[static 4], +// uint64_t point[static 8],uint64_t bscalar[static 4]); +// +// Given scalar = n, point = P and bscalar = m, returns in res +// the point (X,Y) = n * P + m * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Both 256-bit coordinates of the input point P are implicitly +// reduced modulo 2^255-19 if they are not already in reduced form, +// but the conventional usage is that they *are* already reduced. +// The scalars can be arbitrary 256-bit numbers but may also be +// considered as implicitly reduced modulo the group order. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point, RCX = bscalar +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point, R9 = bscalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmuldouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmuldouble) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// Both "resx" and "resy" assume the "res" pointer has been preloaded into %rbp. + +#define resx (0*NUMSIZE)(%rbp) +#define resy (1*NUMSIZE)(%rbp) + +#define scalar (0*NUMSIZE)(%rsp) +#define bscalar (1*NUMSIZE)(%rsp) + +#define acc (3*NUMSIZE)(%rsp) + +#define tabent (7*NUMSIZE)(%rsp) +#define btabent (11*NUMSIZE)(%rsp) + +#define tab (14*NUMSIZE)(%rsp) + +// Additional variables kept on the stack + +#define bf 2*NUMSIZE(%rsp) +#define cf 2*NUMSIZE+8(%rsp) +#define i 2*NUMSIZE+16(%rsp) +#define res 2*NUMSIZE+24(%rsp) + +// Total size to reserve on the stack (excluding local subroutines) + +#define NSPACE (46*NUMSIZE) + +// Sub-references used in local subroutines with local stack + +#define x_0 0(%rdi) +#define y_0 NUMSIZE(%rdi) +#define z_0 (2*NUMSIZE)(%rdi) +#define w_0 (3*NUMSIZE)(%rdi) + +#define x_1 0(%rsi) +#define y_1 NUMSIZE(%rsi) +#define z_1 (2*NUMSIZE)(%rsi) +#define w_1 (3*NUMSIZE)(%rsi) + +#define x_2 0(%rbp) +#define y_2 NUMSIZE(%rbp) +#define z_2 (2*NUMSIZE)(%rbp) +#define w_2 (3*NUMSIZE)(%rbp) + +#define t0 (0*NUMSIZE)(%rsp) +#define t1 (1*NUMSIZE)(%rsp) +#define t2 (2*NUMSIZE)(%rsp) +#define t3 (3*NUMSIZE)(%rsp) +#define t4 (4*NUMSIZE)(%rsp) +#define t5 (5*NUMSIZE)(%rsp) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519. + +#define mul_p25519(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + xorl %ecx, %ecx ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rcx, %r13 ; \ + xorl %ecx, %ecx ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcxq %rcx, %r14 ; \ + xorl %ecx, %ecx ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcxq %rcx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ecx, %ecx ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcxq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + movl $0x13, %edx ; \ + incq %r12; \ + bts $63, %r11 ; \ + mulxq %r12, %rax, %rbx ; \ + addq %rax, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rdx, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rcx, %r9 ; \ + sbbq %rcx, %r10 ; \ + sbbq %rcx, %r11 ; \ + btr $63, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + xorl %ecx, %ecx ; \ + movq P2, %rdx ; \ + mulxq P1, %r8, %r9 ; \ + mulxq 0x8+P1, %rax, %r10 ; \ + addq %rax, %r9 ; \ + mulxq 0x10+P1, %rax, %r11 ; \ + adcq %rax, %r10 ; \ + mulxq 0x18+P1, %rax, %r12 ; \ + adcq %rax, %r11 ; \ + adcq %rcx, %r12 ; \ + xorl %ecx, %ecx ; \ + movq 0x8+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x18+P1, %rax, %r13 ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rcx, %r13 ; \ + xorl %ecx, %ecx ; \ + movq 0x10+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x18+P1, %rax, %r14 ; \ + adcxq %rax, %r13 ; \ + adoxq %rcx, %r14 ; \ + adcxq %rcx, %r14 ; \ + xorl %ecx, %ecx ; \ + movq 0x18+P2, %rdx ; \ + mulxq P1, %rax, %rbx ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + mulxq 0x8+P1, %rax, %rbx ; \ + adcxq %rax, %r12 ; \ + adoxq %rbx, %r13 ; \ + mulxq 0x10+P1, %rax, %rbx ; \ + adcxq %rax, %r13 ; \ + adoxq %rbx, %r14 ; \ + mulxq 0x18+P1, %rax, %r15 ; \ + adcxq %rax, %r14 ; \ + adoxq %rcx, %r15 ; \ + adcxq %rcx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ecx, %ecx ; \ + mulxq %r12, %rax, %rbx ; \ + adcxq %rax, %r8 ; \ + adoxq %rbx, %r9 ; \ + mulxq %r13, %rax, %rbx ; \ + adcxq %rax, %r9 ; \ + adoxq %rbx, %r10 ; \ + mulxq %r14, %rax, %rbx ; \ + adcxq %rax, %r10 ; \ + adoxq %rbx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + adcxq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + movq P1, %rdx ; \ + mulxq %rdx, %r8, %r15 ; \ + mulxq 0x8+P1, %r9, %r10 ; \ + mulxq 0x18+P1, %r11, %r12 ; \ + movq 0x10+P1, %rdx ; \ + mulxq 0x18+P1, %r13, %r14 ; \ + xorl %ebx, %ebx ; \ + mulxq P1, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq 0x8+P1, %rax, %rcx ; \ + adcxq %rax, %r11 ; \ + adoxq %rcx, %r12 ; \ + movq 0x18+P1, %rdx ; \ + mulxq 0x8+P1, %rax, %rcx ; \ + adcxq %rax, %r12 ; \ + adoxq %rcx, %r13 ; \ + adcxq %rbx, %r13 ; \ + adoxq %rbx, %r14 ; \ + adcq %rbx, %r14 ; \ + xorl %ebx, %ebx ; \ + adcxq %r9, %r9 ; \ + adoxq %r15, %r9 ; \ + movq 0x8+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r10, %r10 ; \ + adoxq %rax, %r10 ; \ + adcxq %r11, %r11 ; \ + adoxq %rdx, %r11 ; \ + movq 0x10+P1, %rdx ; \ + mulxq %rdx, %rax, %rdx ; \ + adcxq %r12, %r12 ; \ + adoxq %rax, %r12 ; \ + adcxq %r13, %r13 ; \ + adoxq %rdx, %r13 ; \ + movq 0x18+P1, %rdx ; \ + mulxq %rdx, %rax, %r15 ; \ + adcxq %r14, %r14 ; \ + adoxq %rax, %r14 ; \ + adcxq %rbx, %r15 ; \ + adoxq %rbx, %r15 ; \ + movl $0x26, %edx ; \ + xorl %ebx, %ebx ; \ + mulxq %r12, %rax, %rcx ; \ + adcxq %rax, %r8 ; \ + adoxq %rcx, %r9 ; \ + mulxq %r13, %rax, %rcx ; \ + adcxq %rax, %r9 ; \ + adoxq %rcx, %r10 ; \ + mulxq %r14, %rax, %rcx ; \ + adcxq %rax, %r10 ; \ + adoxq %rcx, %r11 ; \ + mulxq %r15, %rax, %r12 ; \ + adcxq %rax, %r11 ; \ + adoxq %rbx, %r12 ; \ + adcxq %rbx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rbx, %r9 ; \ + adcq %rbx, %r10 ; \ + adcq %rbx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +#define double_twice4(P0,P1) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq %r8, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq %r9, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq %r10, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq %r11, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Load the constant k_25519 = 2 * d_25519 using immediate operations + +#define load_k25519(P0) \ + movq $0xebd69b9426b2f159, %rax ; \ + movq %rax, P0 ; \ + movq $0x00e0149a8283b156, %rax ; \ + movq %rax, 8+P0 ; \ + movq $0x198e80f2eef3d130, %rax ; \ + movq %rax, 16+P0 ; \ + movq $0x2406d9dc56dffce7, %rax ; \ + movq %rax, 24+P0 + +S2N_BN_SYMBOL(edwards25519_scalarmuldouble): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from keeping code and data together. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + movq %r9, %rcx + callq edwards25519_scalarmuldouble_standard + popq %rsi + popq %rdi + ret + +edwards25519_scalarmuldouble_standard: +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy scalars while recoding all 4-bit nybbles except the top +// one (bits 252..255) into signed 4-bit digits. This is essentially +// done just by adding the recoding constant 0x0888..888, after +// which all digits except the first have an implicit bias of -8, +// so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7. +// (We could literally create 2s complement signed nybbles by +// XORing with the same constant 0x0888..888 afterwards, but it +// doesn't seem to make the end usage any simpler.) +// +// In order to ensure that the unrecoded top nybble (bits 252..255) +// does not become > 8 as a result of carries lower down from the +// recoding, we first (conceptually) subtract the group order iff +// the top digit of the scalar is > 2^63. In the implementation the +// reduction and recoding are combined by optionally using the +// modified recoding constant 0x0888...888 + (2^256 - group_order). + + movq (%rcx), %r8 + movq 8(%rcx), %r9 + movq 16(%rcx), %r10 + movq 24(%rcx), %r11 + movq $0xc7f56fb5a0d9e920, %r12 + movq $0xe190b99370cba1d5, %r13 + movq $0x8888888888888887, %r14 + movq $0x8888888888888888, %r15 + movq $0x8000000000000000, %rax + movq $0x0888888888888888, %rbx + cmpq %r11, %rax + cmovncq %r15, %r12 + cmovncq %r15, %r13 + cmovncq %r15, %r14 + cmovncq %rbx, %r15 + addq %r12, %r8 + adcq %r13, %r9 + adcq %r14, %r10 + adcq %r15, %r11 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq %r10, 48(%rsp) + movq %r11, 56(%rsp) + + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + movq $0xc7f56fb5a0d9e920, %r12 + movq $0xe190b99370cba1d5, %r13 + movq $0x8888888888888887, %r14 + movq $0x8888888888888888, %r15 + movq $0x8000000000000000, %rax + movq $0x0888888888888888, %rbx + cmpq %r11, %rax + cmovncq %r15, %r12 + cmovncq %r15, %r13 + cmovncq %r15, %r14 + cmovncq %rbx, %r15 + addq %r12, %r8 + adcq %r13, %r9 + adcq %r14, %r10 + adcq %r15, %r11 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + movq %r11, 24(%rsp) + +// Create table of multiples 1..8 of the general input point at "tab". +// Reduce the input coordinates x and y modulo 2^256 - 38 first, for the +// sake of definiteness; this is the reduction that will be maintained. +// We could slightly optimize the additions because we know the input +// point is affine (so Z = 1), but it doesn't seem worth the complication. + + movl $38, %eax + movq (%rdx), %r8 + xorl %ebx, %ebx + movq 8(%rdx), %r9 + xorl %ecx, %ecx + movq 16(%rdx), %r10 + xorl %esi, %esi + movq 24(%rdx), %r11 + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rsi + cmovncq %r8, %rax + movq %rax, 448(%rsp) + cmovncq %r9, %rbx + movq %rbx, 456(%rsp) + cmovncq %r10, %rcx + movq %rcx, 464(%rsp) + cmovncq %r11, %rsi + movq %rsi, 472(%rsp) + + movl $38, %eax + movq 32(%rdx), %r8 + xorl %ebx, %ebx + movq 40(%rdx), %r9 + xorl %ecx, %ecx + movq 48(%rdx), %r10 + xorl %esi, %esi + movq 56(%rdx), %r11 + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rsi + cmovncq %r8, %rax + movq %rax, 480(%rsp) + cmovncq %r9, %rbx + movq %rbx, 488(%rsp) + cmovncq %r10, %rcx + movq %rcx, 496(%rsp) + cmovncq %r11, %rsi + movq %rsi, 504(%rsp) + + movl $1, %eax + movq %rax, 512(%rsp) + xorl %eax, %eax + movq %rax, 520(%rsp) + movq %rax, 528(%rsp) + movq %rax, 536(%rsp) + + leaq 544(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 480(%rsp), %rbp + mul_4(x_0,x_1,x_2) + +// Multiple 2 + + leaq 576(%rsp), %rdi + leaq 448(%rsp), %rsi + callq edwards25519_scalarmuldouble_epdouble + +// Multiple 3 + + leaq 704(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 576(%rsp), %rbp + callq edwards25519_scalarmuldouble_epadd + +// Multiple 4 + + leaq 832(%rsp), %rdi + leaq 576(%rsp), %rsi + callq edwards25519_scalarmuldouble_epdouble + +// Multiple 5 + + leaq 960(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 832(%rsp), %rbp + callq edwards25519_scalarmuldouble_epadd + +// Multiple 6 + + leaq 1088(%rsp), %rdi + leaq 704(%rsp), %rsi + callq edwards25519_scalarmuldouble_epdouble + +// Multiple 7 + + leaq 1216(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 1088(%rsp), %rbp + callq edwards25519_scalarmuldouble_epadd + +// Multiple 8 + + leaq 1344(%rsp), %rdi + leaq 832(%rsp), %rsi + callq edwards25519_scalarmuldouble_epdouble + +// Handle the initialization, starting the loop counter at i = 252 +// and initializing acc to the sum of the table entries for the +// top nybbles of the scalars (the ones with no implicit -8 bias). + + movq $252, %rax + movq %rax, i + +// Index for btable entry... + + movq 56(%rsp), %rax + shrq $60, %rax + movq %rax, bf + +// ...and constant-time indexing based on that index + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + leaq edwards25519_scalarmuldouble_table(%rip), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + + movq %rax, 352(%rsp) + movq %rbx, 360(%rsp) + movq %rcx, 368(%rsp) + movq %rdx, 376(%rsp) + movq %r8, 384(%rsp) + movq %r9, 392(%rsp) + movq %r10, 400(%rsp) + movq %r11, 408(%rsp) + movq %r12, 416(%rsp) + movq %r13, 424(%rsp) + movq %r14, 432(%rsp) + movq %r15, 440(%rsp) + +// Index for table entry... + + movq 24(%rsp), %rax + shrq $60, %rax + movq %rax, bf + +// ...and constant-time indexing based on that index. +// Do the Y and Z fields first, to save on registers... + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + leaq 480(%rsp), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + + movq %rax, 256(%rsp) + movq %rbx, 264(%rsp) + movq %rcx, 272(%rsp) + movq %rdx, 280(%rsp) + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq %r10, 304(%rsp) + movq %r11, 312(%rsp) + +// ...followed by the X and W fields + + leaq 448(%rsp), %rbp + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + + movq %rax, 224(%rsp) + movq %rbx, 232(%rsp) + movq %rcx, 240(%rsp) + movq %rdx, 248(%rsp) + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq %r10, 336(%rsp) + movq %r11, 344(%rsp) + +// Add those elements to initialize the accumulator for bit position 252 + + leaq 96(%rsp), %rdi + leaq 224(%rsp), %rsi + leaq 352(%rsp), %rbp + callq edwards25519_scalarmuldouble_pepadd + +// Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint +// Start with i = 252 for bits 248..251 and go down four at a time to 3..0 + +edwards25519_scalarmuldouble_loop: + + movq i, %rax + subq $4, %rax + movq %rax, i + +// Double to acc' = 2 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_pdouble + +// Get btable entry, first getting the adjusted bitfield... + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq 32(%rsp,%rax,8), %rax + shrq %cl, %rax + andq $15, %rax + + subq $8, %rax + sbbq %rcx, %rcx + xorq %rcx, %rax + subq %rcx, %rax + movq %rcx, cf + movq %rax, bf + +// ... then doing constant-time lookup with the appropriate index... + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + leaq edwards25519_scalarmuldouble_table(%rip), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + +// ... then optionally negating before storing. The table entry +// is in precomputed form and we currently have +// +// [%rdx;%rcx;%rbx;%rax] = y - x +// [%r11;%r10;%r9;%r8] = x + y +// [%r15;%r14;%r13;%r12] = 2 * d * x * y +// +// Negation for Edwards curves is -(x,y) = (-x,y), which in this modified +// form amounts to swapping the first two fields and negating the third. +// The negation does not always fully reduce even mod 2^256-38 in the zero +// case, instead giving -0 = 2^256-38. But that is fine since the result is +// always fed to a multipliction inside the "pepadd" function below that +// handles any 256-bit input. + + movq cf, %rdi + testq %rdi, %rdi + + movq %rax, %rsi + cmovnzq %r8, %rsi + cmovnzq %rax, %r8 + movq %rsi, 352(%rsp) + movq %r8, 384(%rsp) + + movq %rbx, %rsi + cmovnzq %r9, %rsi + cmovnzq %rbx, %r9 + movq %rsi, 360(%rsp) + movq %r9, 392(%rsp) + + movq %rcx, %rsi + cmovnzq %r10, %rsi + cmovnzq %rcx, %r10 + movq %rsi, 368(%rsp) + movq %r10, 400(%rsp) + + movq %rdx, %rsi + cmovnzq %r11, %rsi + cmovnzq %rdx, %r11 + movq %rsi, 376(%rsp) + movq %r11, 408(%rsp) + + xorq %rdi, %r12 + xorq %rdi, %r13 + xorq %rdi, %r14 + xorq %rdi, %r15 + andq $37, %rdi + subq %rdi, %r12 + sbbq $0, %r13 + sbbq $0, %r14 + sbbq $0, %r15 + movq %r12, 416(%rsp) + movq %r13, 424(%rsp) + movq %r14, 432(%rsp) + movq %r15, 440(%rsp) + +// Get table entry, first getting the adjusted bitfield... + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq (%rsp,%rax,8), %rax + shrq %cl, %rax + andq $15, %rax + + subq $8, %rax + sbbq %rcx, %rcx + xorq %rcx, %rax + subq %rcx, %rax + movq %rcx, cf + movq %rax, bf + +// ...and constant-time indexing based on that index +// Do the Y and Z fields first, to save on registers +// and store them back (they don't need any modification) + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + leaq 480(%rsp), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + + movq %rax, 256(%rsp) + movq %rbx, 264(%rsp) + movq %rcx, 272(%rsp) + movq %rdx, 280(%rsp) + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq %r10, 304(%rsp) + movq %r11, 312(%rsp) + +// Now do the X and W fields... + + leaq 448(%rsp), %rbp + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + +// ... then optionally negate before storing the X and W fields. This +// time the table entry is extended-projective, and is here: +// +// [%rdx;%rcx;%rbx;%rax] = X +// [tabent+32] = Y +// [tabent+64] = Z +// [%r11;%r10;%r9;%r8] = W +// +// This time we just need to negate the X and the W fields. +// The crude way negation is done can result in values of X or W +// (when initially zero before negation) being exactly equal to +// 2^256-38, but the "pepadd" function handles that correctly. + + movq cf, %rdi + + xorq %rdi, %rax + xorq %rdi, %rbx + xorq %rdi, %rcx + xorq %rdi, %rdx + + xorq %rdi, %r8 + xorq %rdi, %r9 + xorq %rdi, %r10 + xorq %rdi, %r11 + + andq $37, %rdi + + subq %rdi, %rax + sbbq $0, %rbx + sbbq $0, %rcx + sbbq $0, %rdx + + movq %rax, 224(%rsp) + movq %rbx, 232(%rsp) + movq %rcx, 240(%rsp) + movq %rdx, 248(%rsp) + + subq %rdi, %r8 + sbbq $0, %r9 + sbbq $0, %r10 + sbbq $0, %r11 + + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq %r10, 336(%rsp) + movq %r11, 344(%rsp) + +// Double to acc' = 4 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_pdouble + +// Add tabent := tabent + btabent + + leaq 224(%rsp), %rdi + leaq 224(%rsp), %rsi + leaq 352(%rsp), %rbp + callq edwards25519_scalarmuldouble_pepadd + +// Double to acc' = 8 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_pdouble + +// Double to acc' = 16 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_epdouble + +// Add table entry, acc := acc + tabent + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 224(%rsp), %rbp + callq edwards25519_scalarmuldouble_epadd + +// Loop down + + movq i, %rax + testq %rax, %rax + jnz edwards25519_scalarmuldouble_loop + +// Modular inverse setup + + movq $4, %rdi + leaq 224(%rsp), %rsi + leaq 160(%rsp), %rdx + leaq edwards25519_scalarmuldouble_p25519(%rip), %rcx + leaq 352(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites local variables that are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +edwards25519_scalarmuldouble_copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +edwards25519_scalarmuldouble_outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +edwards25519_scalarmuldouble_toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb edwards25519_scalarmuldouble_toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +edwards25519_scalarmuldouble_innerloop: + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi + cmovbq %r12, %rbp + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 + addq %r15, %r11 + shrq $1, %r12 + addq %rcx, %rcx + addq %rdx, %rdx + decq %r9 + jne edwards25519_scalarmuldouble_innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +edwards25519_scalarmuldouble_congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je edwards25519_scalarmuldouble_wmontend +edwards25519_scalarmuldouble_wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_wmontloop +edwards25519_scalarmuldouble_wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je edwards25519_scalarmuldouble_zmontend +edwards25519_scalarmuldouble_zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_zmontloop +edwards25519_scalarmuldouble_zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +edwards25519_scalarmuldouble_crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb edwards25519_scalarmuldouble_crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +edwards25519_scalarmuldouble_optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb edwards25519_scalarmuldouble_optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +edwards25519_scalarmuldouble_shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne edwards25519_scalarmuldouble_shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +edwards25519_scalarmuldouble_fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_fliploop + subq $0x3a, 0x20(%rsp) + ja edwards25519_scalarmuldouble_outerloop + +// Store result + + movq res, %rdi + leaq 96(%rsp), %rsi + leaq 224(%rsp), %rbp + mul_p25519(x_0,x_1,x_2) + + movq res, %rdi + addq $32, %rdi + leaq 128(%rsp), %rsi + leaq 224(%rsp), %rbp + mul_p25519(x_0,x_1,x_2) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// **************************************************************************** +// Localized versions of subroutines. +// These are close to the standalone functions "edwards25519_epdouble" etc., +// but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19. +// **************************************************************************** + +edwards25519_scalarmuldouble_epdouble: + sub $(5*NUMSIZE), %rsp + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(w_0,t1,t4) + mul_4(x_0,t1,t3) + add $(5*NUMSIZE), %rsp + ret + +edwards25519_scalarmuldouble_pdouble: + sub $(5*NUMSIZE), %rsp + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(x_0,t1,t3) + add $(5*NUMSIZE), %rsp + ret + +edwards25519_scalarmuldouble_epadd: + sub $(6*NUMSIZE), %rsp + mul_4(t0,w_1,w_2) + sub_twice4(t1,y_1,x_1) + sub_twice4(t2,y_2,x_2) + add_twice4(t3,y_1,x_1) + add_twice4(t4,y_2,x_2) + double_twice4(t5,z_2) + mul_4(t1,t1,t2) + mul_4(t3,t3,t4) + load_k25519(t2) + mul_4(t2,t2,t0) + mul_4(t4,z_1,t5) + sub_twice4(t0,t3,t1) + add_twice4(t5,t3,t1) + sub_twice4(t1,t4,t2) + add_twice4(t3,t4,t2) + mul_4(w_0,t0,t5) + mul_4(x_0,t0,t1) + mul_4(y_0,t3,t5) + mul_4(z_0,t1,t3) + add $(6*NUMSIZE), %rsp + ret + +edwards25519_scalarmuldouble_pepadd: + sub $(6*NUMSIZE), %rsp + double_twice4(t0,z_1); + sub_twice4(t1,y_1,x_1); + add_twice4(t2,y_1,x_1); + mul_4(t3,w_1,z_2); + mul_4(t1,t1,x_2); + mul_4(t2,t2,y_2); + sub_twice4(t4,t0,t3); + add_twice4(t0,t0,t3); + sub_twice4(t5,t2,t1); + add_twice4(t1,t2,t1); + mul_4(z_0,t4,t0); + mul_4(x_0,t5,t4); + mul_4(y_0,t0,t1); + mul_4(w_0,t5,t1); + add $(6*NUMSIZE), %rsp + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +edwards25519_scalarmuldouble_p25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_scalarmuldouble_table: + + // 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S b/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S new file mode 100644 index 0000000000..7f3dffa395 --- /dev/null +++ b/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S @@ -0,0 +1,2783 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Double scalar multiplication for edwards25519, fresh and base point +// Input scalar[4], point[8], bscalar[4]; output res[8] +// +// extern void edwards25519_scalarmuldouble_alt +// (uint64_t res[static 8],uint64_t scalar[static 4], +// uint64_t point[static 8],uint64_t bscalar[static 4]); +// +// Given scalar = n, point = P and bscalar = m, returns in res +// the point (X,Y) = n * P + m * B where B = (...,4/5) is +// the standard basepoint for the edwards25519 (Ed25519) curve. +// +// Both 256-bit coordinates of the input point P are implicitly +// reduced modulo 2^255-19 if they are not already in reduced form, +// but the conventional usage is that they *are* already reduced. +// The scalars can be arbitrary 256-bit numbers but may also be +// considered as implicitly reduced modulo the group order. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point, RCX = bscalar +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point, R9 = bscalar +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmuldouble_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmuldouble_alt) + .text + +// Size of individual field elements + +#define NUMSIZE 32 + +// Pointer-offset pairs for result and temporaries on stack with some aliasing. +// Both "resx" and "resy" assume the "res" pointer has been preloaded into %rbp. + +#define resx (0*NUMSIZE)(%rbp) +#define resy (1*NUMSIZE)(%rbp) + +#define scalar (0*NUMSIZE)(%rsp) +#define bscalar (1*NUMSIZE)(%rsp) + +#define acc (3*NUMSIZE)(%rsp) + +#define tabent (7*NUMSIZE)(%rsp) +#define btabent (11*NUMSIZE)(%rsp) + +#define tab (14*NUMSIZE)(%rsp) + +// Additional variables kept on the stack + +#define bf 2*NUMSIZE(%rsp) +#define cf 2*NUMSIZE+8(%rsp) +#define i 2*NUMSIZE+16(%rsp) +#define res 2*NUMSIZE+24(%rsp) + +// Total size to reserve on the stack (excluding local subroutines) + +#define NSPACE (46*NUMSIZE) + +// Sub-references used in local subroutines with local stack + +#define x_0 0(%rdi) +#define y_0 NUMSIZE(%rdi) +#define z_0 (2*NUMSIZE)(%rdi) +#define w_0 (3*NUMSIZE)(%rdi) + +#define x_1 0(%rsi) +#define y_1 NUMSIZE(%rsi) +#define z_1 (2*NUMSIZE)(%rsi) +#define w_1 (3*NUMSIZE)(%rsi) + +#define x_2 0(%rbp) +#define y_2 NUMSIZE(%rbp) +#define z_2 (2*NUMSIZE)(%rbp) +#define w_2 (3*NUMSIZE)(%rbp) + +#define t0 (0*NUMSIZE)(%rsp) +#define t1 (1*NUMSIZE)(%rsp) +#define t2 (2*NUMSIZE)(%rsp) +#define t3 (3*NUMSIZE)(%rsp) +#define t4 (4*NUMSIZE)(%rsp) +#define t5 (5*NUMSIZE)(%rsp) + +// Macro wrapping up the basic field multiplication, only trivially +// different from a pure function call to bignum_mul_p25519_alt. + +#define mul_p25519(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %esi ; \ + movq %r12, %rax ; \ + mulq %rsi; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rsi; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + leaq 0x1(%r12), %rax ; \ + movl $0x13, %esi ; \ + bts $63, %r11 ; \ + imulq %rsi, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + sbbq %rax, %rax ; \ + notq %rax; \ + andq %rsi, %rax ; \ + subq %rax, %r8 ; \ + sbbq %rcx, %r9 ; \ + sbbq %rcx, %r10 ; \ + sbbq %rcx, %r11 ; \ + btr $63, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// A version of multiplication that only guarantees output < 2 * p_25519. +// This basically skips the +1 and final correction in quotient estimation. + +#define mul_4(P0,P1,P2) \ + movq P1, %rax ; \ + mulq P2; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + movq 0x8+P1, %rax ; \ + mulq P2; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq 0x10+P1, %rax ; \ + mulq P2; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq %r13, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x18+P1, %rax ; \ + mulq P2; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq %r14, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x8+P2; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq %r15, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x10+P2; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq 0x18+P2; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %ebx ; \ + movq %r12, %rax ; \ + mulq %rbx; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Squaring just giving a result < 2 * p_25519, which is done by +// basically skipping the +1 in the quotient estimate and the final +// optional correction. + +#define sqr_4(P0,P1) \ + movq P1, %rax ; \ + mulq %rax; \ + movq %rax, %r8 ; \ + movq %rdx, %r9 ; \ + xorq %r10, %r10 ; \ + xorq %r11, %r11 ; \ + movq P1, %rax ; \ + mulq 0x8+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r11 ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + adcq $0x0, %r11 ; \ + xorq %r12, %r12 ; \ + movq 0x8+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + movq P1, %rax ; \ + mulq 0x10+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r12 ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + adcq $0x0, %r12 ; \ + xorq %r13, %r13 ; \ + movq P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r13 ; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x10+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r13 ; \ + addq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ + adcq $0x0, %r13 ; \ + xorq %r14, %r14 ; \ + movq 0x8+P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r14 ; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + movq 0x10+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r12 ; \ + adcq %rdx, %r13 ; \ + adcq $0x0, %r14 ; \ + xorq %r15, %r15 ; \ + movq 0x10+P1, %rax ; \ + mulq 0x18+P1; \ + addq %rax, %rax ; \ + adcq %rdx, %rdx ; \ + adcq $0x0, %r15 ; \ + addq %rax, %r13 ; \ + adcq %rdx, %r14 ; \ + adcq $0x0, %r15 ; \ + movq 0x18+P1, %rax ; \ + mulq %rax; \ + addq %rax, %r14 ; \ + adcq %rdx, %r15 ; \ + movl $0x26, %ebx ; \ + movq %r12, %rax ; \ + mulq %rbx; \ + addq %rax, %r8 ; \ + adcq %rdx, %r9 ; \ + sbbq %rcx, %rcx ; \ + movq %r13, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r9 ; \ + adcq %rdx, %r10 ; \ + sbbq %rcx, %rcx ; \ + movq %r14, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + addq %rax, %r10 ; \ + adcq %rdx, %r11 ; \ + sbbq %rcx, %rcx ; \ + movq %r15, %rax ; \ + mulq %rbx; \ + subq %rcx, %rdx ; \ + xorq %rcx, %rcx ; \ + addq %rax, %r11 ; \ + movq %rdx, %r12 ; \ + adcq %rcx, %r12 ; \ + shldq $0x1, %r11, %r12 ; \ + btr $0x3f, %r11 ; \ + movl $0x13, %edx ; \ + imulq %r12, %rdx ; \ + addq %rdx, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38 + +#define sub_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ebx, %ebx ; \ + subq P2, %r8 ; \ + movq 8+P1, %r9 ; \ + sbbq 8+P2, %r9 ; \ + movl $38, %ecx ; \ + movq 16+P1, %r10 ; \ + sbbq 16+P2, %r10 ; \ + movq 24+P1, %rax ; \ + sbbq 24+P2, %rax ; \ + cmovncq %rbx, %rcx ; \ + subq %rcx, %r8 ; \ + sbbq %rbx, %r9 ; \ + sbbq %rbx, %r10 ; \ + sbbq %rbx, %rax ; \ + movq %r8, P0 ; \ + movq %r9, 8+P0 ; \ + movq %r10, 16+P0 ; \ + movq %rax, 24+P0 + +// Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38. +// This only ensures that the result fits in 4 digits, not that it is reduced +// even w.r.t. double modulus. The result is always correct modulo provided +// the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided +// at least one of them is reduced double modulo. + +#define add_twice4(P0,P1,P2) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq P2, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq 0x8+P2, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq 0x10+P2, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq 0x18+P2, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +#define double_twice4(P0,P1) \ + movq P1, %r8 ; \ + xorl %ecx, %ecx ; \ + addq %r8, %r8 ; \ + movq 0x8+P1, %r9 ; \ + adcq %r9, %r9 ; \ + movq 0x10+P1, %r10 ; \ + adcq %r10, %r10 ; \ + movq 0x18+P1, %r11 ; \ + adcq %r11, %r11 ; \ + movl $38, %eax ; \ + cmovncq %rcx, %rax ; \ + addq %rax, %r8 ; \ + adcq %rcx, %r9 ; \ + adcq %rcx, %r10 ; \ + adcq %rcx, %r11 ; \ + movq %r8, P0 ; \ + movq %r9, 0x8+P0 ; \ + movq %r10, 0x10+P0 ; \ + movq %r11, 0x18+P0 + +// Load the constant k_25519 = 2 * d_25519 using immediate operations + +#define load_k25519(P0) \ + movq $0xebd69b9426b2f159, %rax ; \ + movq %rax, P0 ; \ + movq $0x00e0149a8283b156, %rax ; \ + movq %rax, 8+P0 ; \ + movq $0x198e80f2eef3d130, %rax ; \ + movq %rax, 16+P0 ; \ + movq $0x2406d9dc56dffce7, %rax ; \ + movq %rax, 24+P0 + +S2N_BN_SYMBOL(edwards25519_scalarmuldouble_alt): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from keeping code and data together. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + movq %r9, %rcx + callq edwards25519_scalarmuldouble_alt_standard + popq %rsi + popq %rdi + ret + +edwards25519_scalarmuldouble_alt_standard: +#endif + +// Save registers, make room for temps, preserve input arguments. + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $NSPACE, %rsp + +// Move the output pointer to a stable place + + movq %rdi, res + +// Copy scalars while recoding all 4-bit nybbles except the top +// one (bits 252..255) into signed 4-bit digits. This is essentially +// done just by adding the recoding constant 0x0888..888, after +// which all digits except the first have an implicit bias of -8, +// so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7. +// (We could literally create 2s complement signed nybbles by +// XORing with the same constant 0x0888..888 afterwards, but it +// doesn't seem to make the end usage any simpler.) +// +// In order to ensure that the unrecoded top nybble (bits 252..255) +// does not become > 8 as a result of carries lower down from the +// recoding, we first (conceptually) subtract the group order iff +// the top digit of the scalar is > 2^63. In the implementation the +// reduction and recoding are combined by optionally using the +// modified recoding constant 0x0888...888 + (2^256 - group_order). + + movq (%rcx), %r8 + movq 8(%rcx), %r9 + movq 16(%rcx), %r10 + movq 24(%rcx), %r11 + movq $0xc7f56fb5a0d9e920, %r12 + movq $0xe190b99370cba1d5, %r13 + movq $0x8888888888888887, %r14 + movq $0x8888888888888888, %r15 + movq $0x8000000000000000, %rax + movq $0x0888888888888888, %rbx + cmpq %r11, %rax + cmovncq %r15, %r12 + cmovncq %r15, %r13 + cmovncq %r15, %r14 + cmovncq %rbx, %r15 + addq %r12, %r8 + adcq %r13, %r9 + adcq %r14, %r10 + adcq %r15, %r11 + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + movq %r10, 48(%rsp) + movq %r11, 56(%rsp) + + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + movq $0xc7f56fb5a0d9e920, %r12 + movq $0xe190b99370cba1d5, %r13 + movq $0x8888888888888887, %r14 + movq $0x8888888888888888, %r15 + movq $0x8000000000000000, %rax + movq $0x0888888888888888, %rbx + cmpq %r11, %rax + cmovncq %r15, %r12 + cmovncq %r15, %r13 + cmovncq %r15, %r14 + cmovncq %rbx, %r15 + addq %r12, %r8 + adcq %r13, %r9 + adcq %r14, %r10 + adcq %r15, %r11 + movq %r8, (%rsp) + movq %r9, 8(%rsp) + movq %r10, 16(%rsp) + movq %r11, 24(%rsp) + +// Create table of multiples 1..8 of the general input point at "tab". +// Reduce the input coordinates x and y modulo 2^256 - 38 first, for the +// sake of definiteness; this is the reduction that will be maintained. +// We could slightly optimize the additions because we know the input +// point is affine (so Z = 1), but it doesn't seem worth the complication. + + movl $38, %eax + movq (%rdx), %r8 + xorl %ebx, %ebx + movq 8(%rdx), %r9 + xorl %ecx, %ecx + movq 16(%rdx), %r10 + xorl %esi, %esi + movq 24(%rdx), %r11 + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rsi + cmovncq %r8, %rax + movq %rax, 448(%rsp) + cmovncq %r9, %rbx + movq %rbx, 456(%rsp) + cmovncq %r10, %rcx + movq %rcx, 464(%rsp) + cmovncq %r11, %rsi + movq %rsi, 472(%rsp) + + movl $38, %eax + movq 32(%rdx), %r8 + xorl %ebx, %ebx + movq 40(%rdx), %r9 + xorl %ecx, %ecx + movq 48(%rdx), %r10 + xorl %esi, %esi + movq 56(%rdx), %r11 + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rsi + cmovncq %r8, %rax + movq %rax, 480(%rsp) + cmovncq %r9, %rbx + movq %rbx, 488(%rsp) + cmovncq %r10, %rcx + movq %rcx, 496(%rsp) + cmovncq %r11, %rsi + movq %rsi, 504(%rsp) + + movl $1, %eax + movq %rax, 512(%rsp) + xorl %eax, %eax + movq %rax, 520(%rsp) + movq %rax, 528(%rsp) + movq %rax, 536(%rsp) + + leaq 544(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 480(%rsp), %rbp + mul_4(x_0,x_1,x_2) + +// Multiple 2 + + leaq 576(%rsp), %rdi + leaq 448(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_epdouble + +// Multiple 3 + + leaq 704(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 576(%rsp), %rbp + callq edwards25519_scalarmuldouble_alt_epadd + +// Multiple 4 + + leaq 832(%rsp), %rdi + leaq 576(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_epdouble + +// Multiple 5 + + leaq 960(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 832(%rsp), %rbp + callq edwards25519_scalarmuldouble_alt_epadd + +// Multiple 6 + + leaq 1088(%rsp), %rdi + leaq 704(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_epdouble + +// Multiple 7 + + leaq 1216(%rsp), %rdi + leaq 448(%rsp), %rsi + leaq 1088(%rsp), %rbp + callq edwards25519_scalarmuldouble_alt_epadd + +// Multiple 8 + + leaq 1344(%rsp), %rdi + leaq 832(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_epdouble + +// Handle the initialization, starting the loop counter at i = 252 +// and initializing acc to the sum of the table entries for the +// top nybbles of the scalars (the ones with no implicit -8 bias). + + movq $252, %rax + movq %rax, i + +// Index for btable entry... + + movq 56(%rsp), %rax + shrq $60, %rax + movq %rax, bf + +// ...and constant-time indexing based on that index + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + leaq edwards25519_scalarmuldouble_alt_table(%rip), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + + movq %rax, 352(%rsp) + movq %rbx, 360(%rsp) + movq %rcx, 368(%rsp) + movq %rdx, 376(%rsp) + movq %r8, 384(%rsp) + movq %r9, 392(%rsp) + movq %r10, 400(%rsp) + movq %r11, 408(%rsp) + movq %r12, 416(%rsp) + movq %r13, 424(%rsp) + movq %r14, 432(%rsp) + movq %r15, 440(%rsp) + +// Index for table entry... + + movq 24(%rsp), %rax + shrq $60, %rax + movq %rax, bf + +// ...and constant-time indexing based on that index. +// Do the Y and Z fields first, to save on registers... + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + leaq 480(%rsp), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + + movq %rax, 256(%rsp) + movq %rbx, 264(%rsp) + movq %rcx, 272(%rsp) + movq %rdx, 280(%rsp) + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq %r10, 304(%rsp) + movq %r11, 312(%rsp) + +// ...followed by the X and W fields + + leaq 448(%rsp), %rbp + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + + movq %rax, 224(%rsp) + movq %rbx, 232(%rsp) + movq %rcx, 240(%rsp) + movq %rdx, 248(%rsp) + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq %r10, 336(%rsp) + movq %r11, 344(%rsp) + +// Add those elements to initialize the accumulator for bit position 252 + + leaq 96(%rsp), %rdi + leaq 224(%rsp), %rsi + leaq 352(%rsp), %rbp + callq edwards25519_scalarmuldouble_alt_pepadd + +// Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint +// Start with i = 252 for bits 248..251 and go down four at a time to 3..0 + +edwards25519_scalarmuldouble_alt_loop: + + movq i, %rax + subq $4, %rax + movq %rax, i + +// Double to acc' = 2 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_pdouble + +// Get btable entry, first getting the adjusted bitfield... + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq 32(%rsp,%rax,8), %rax + shrq %cl, %rax + andq $15, %rax + + subq $8, %rax + sbbq %rcx, %rcx + xorq %rcx, %rax + subq %rcx, %rax + movq %rcx, cf + movq %rax, bf + +// ... then doing constant-time lookup with the appropriate index... + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + leaq edwards25519_scalarmuldouble_alt_table(%rip), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + addq $96, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + movq 64(%rbp), %rsi + cmovzq %rsi, %r12 + movq 72(%rbp), %rsi + cmovzq %rsi, %r13 + movq 80(%rbp), %rsi + cmovzq %rsi, %r14 + movq 88(%rbp), %rsi + cmovzq %rsi, %r15 + +// ... then optionally negating before storing. The table entry +// is in precomputed form and we currently have +// +// [%rdx;%rcx;%rbx;%rax] = y - x +// [%r11;%r10;%r9;%r8] = x + y +// [%r15;%r14;%r13;%r12] = 2 * d * x * y +// +// Negation for Edwards curves is -(x,y) = (-x,y), which in this modified +// form amounts to swapping the first two fields and negating the third. +// The negation does not always fully reduce even mod 2^256-38 in the zero +// case, instead giving -0 = 2^256-38. But that is fine since the result is +// always fed to a multipliction inside the "pepadd" function below that +// handles any 256-bit input. + + movq cf, %rdi + testq %rdi, %rdi + + movq %rax, %rsi + cmovnzq %r8, %rsi + cmovnzq %rax, %r8 + movq %rsi, 352(%rsp) + movq %r8, 384(%rsp) + + movq %rbx, %rsi + cmovnzq %r9, %rsi + cmovnzq %rbx, %r9 + movq %rsi, 360(%rsp) + movq %r9, 392(%rsp) + + movq %rcx, %rsi + cmovnzq %r10, %rsi + cmovnzq %rcx, %r10 + movq %rsi, 368(%rsp) + movq %r10, 400(%rsp) + + movq %rdx, %rsi + cmovnzq %r11, %rsi + cmovnzq %rdx, %r11 + movq %rsi, 376(%rsp) + movq %r11, 408(%rsp) + + xorq %rdi, %r12 + xorq %rdi, %r13 + xorq %rdi, %r14 + xorq %rdi, %r15 + andq $37, %rdi + subq %rdi, %r12 + sbbq $0, %r13 + sbbq $0, %r14 + sbbq $0, %r15 + movq %r12, 416(%rsp) + movq %r13, 424(%rsp) + movq %r14, 432(%rsp) + movq %r15, 440(%rsp) + +// Get table entry, first getting the adjusted bitfield... + + movq i, %rax + movq %rax, %rcx + shrq $6, %rax + movq (%rsp,%rax,8), %rax + shrq %cl, %rax + andq $15, %rax + + subq $8, %rax + sbbq %rcx, %rcx + xorq %rcx, %rax + subq %rcx, %rax + movq %rcx, cf + movq %rax, bf + +// ...and constant-time indexing based on that index +// Do the Y and Z fields first, to save on registers +// and store them back (they don't need any modification) + + movl $1, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + movl $1, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + leaq 480(%rsp), %rbp + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 32(%rbp), %rsi + cmovzq %rsi, %r8 + movq 40(%rbp), %rsi + cmovzq %rsi, %r9 + movq 48(%rbp), %rsi + cmovzq %rsi, %r10 + movq 56(%rbp), %rsi + cmovzq %rsi, %r11 + + movq %rax, 256(%rsp) + movq %rbx, 264(%rsp) + movq %rcx, 272(%rsp) + movq %rdx, 280(%rsp) + movq %r8, 288(%rsp) + movq %r9, 296(%rsp) + movq %r10, 304(%rsp) + movq %r11, 312(%rsp) + +// Now do the X and W fields... + + leaq 448(%rsp), %rbp + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + + cmpq $1, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $2, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $3, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $4, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $5, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $6, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $7, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + addq $128, %rbp + + cmpq $8, bf + movq (%rbp), %rsi + cmovzq %rsi, %rax + movq 8(%rbp), %rsi + cmovzq %rsi, %rbx + movq 16(%rbp), %rsi + cmovzq %rsi, %rcx + movq 24(%rbp), %rsi + cmovzq %rsi, %rdx + movq 96(%rbp), %rsi + cmovzq %rsi, %r8 + movq 104(%rbp), %rsi + cmovzq %rsi, %r9 + movq 112(%rbp), %rsi + cmovzq %rsi, %r10 + movq 120(%rbp), %rsi + cmovzq %rsi, %r11 + +// ... then optionally negate before storing the X and W fields. This +// time the table entry is extended-projective, and is here: +// +// [%rdx;%rcx;%rbx;%rax] = X +// [tabent+32] = Y +// [tabent+64] = Z +// [%r11;%r10;%r9;%r8] = W +// +// This time we just need to negate the X and the W fields. +// The crude way negation is done can result in values of X or W +// (when initially zero before negation) being exactly equal to +// 2^256-38, but the "pepadd" function handles that correctly. + + movq cf, %rdi + + xorq %rdi, %rax + xorq %rdi, %rbx + xorq %rdi, %rcx + xorq %rdi, %rdx + + xorq %rdi, %r8 + xorq %rdi, %r9 + xorq %rdi, %r10 + xorq %rdi, %r11 + + andq $37, %rdi + + subq %rdi, %rax + sbbq $0, %rbx + sbbq $0, %rcx + sbbq $0, %rdx + + movq %rax, 224(%rsp) + movq %rbx, 232(%rsp) + movq %rcx, 240(%rsp) + movq %rdx, 248(%rsp) + + subq %rdi, %r8 + sbbq $0, %r9 + sbbq $0, %r10 + sbbq $0, %r11 + + movq %r8, 320(%rsp) + movq %r9, 328(%rsp) + movq %r10, 336(%rsp) + movq %r11, 344(%rsp) + +// Double to acc' = 4 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_pdouble + +// Add tabent := tabent + btabent + + leaq 224(%rsp), %rdi + leaq 224(%rsp), %rsi + leaq 352(%rsp), %rbp + callq edwards25519_scalarmuldouble_alt_pepadd + +// Double to acc' = 8 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_pdouble + +// Double to acc' = 16 * acc + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + callq edwards25519_scalarmuldouble_alt_epdouble + +// Add table entry, acc := acc + tabent + + leaq 96(%rsp), %rdi + leaq 96(%rsp), %rsi + leaq 224(%rsp), %rbp + callq edwards25519_scalarmuldouble_alt_epadd + +// Loop down + + movq i, %rax + testq %rax, %rax + jnz edwards25519_scalarmuldouble_alt_loop + +// Modular inverse setup + + movq $4, %rdi + leaq 224(%rsp), %rsi + leaq 160(%rsp), %rdx + leaq edwards25519_scalarmuldouble_alt_p25519(%rip), %rcx + leaq 352(%rsp), %r8 + +// Inline copy of bignum_modinv, identical except for stripping out the +// prologue and epilogue saving and restoring registers and the initial +// test for k = 0 (which is trivially false here since k = 4). For more +// details and explanations see "x86/generic/bignum_modinv.S". Note +// that the stack it uses for its own temporaries is 80 bytes so it +// only overwrites local variables that are no longer needed. + + movq %rsi, 0x40(%rsp) + movq %r8, 0x38(%rsp) + movq %rcx, 0x48(%rsp) + leaq (%r8,%rdi,8), %r10 + movq %r10, 0x30(%rsp) + leaq (%r10,%rdi,8), %r15 + xorq %r11, %r11 + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_copyloop: + movq (%rdx,%r9,8), %rax + movq (%rcx,%r9,8), %rbx + movq %rax, (%r10,%r9,8) + movq %rbx, (%r15,%r9,8) + movq %rbx, (%r8,%r9,8) + movq %r11, (%rsi,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_alt_copyloop + movq (%r8), %rax + movq %rax, %rbx + decq %rbx + movq %rbx, (%r8) + movq %rax, %rbp + movq %rax, %r12 + shlq $0x2, %rbp + subq %rbp, %r12 + xorq $0x2, %r12 + movq %r12, %rbp + imulq %rax, %rbp + movl $0x2, %eax + addq %rbp, %rax + addq $0x1, %rbp + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + imulq %rbp, %rbp + movl $0x1, %eax + addq %rbp, %rax + imulq %rax, %r12 + movq %r12, 0x28(%rsp) + movq %rdi, %rax + shlq $0x7, %rax + movq %rax, 0x20(%rsp) +edwards25519_scalarmuldouble_alt_outerloop: + movq 0x20(%rsp), %r13 + addq $0x3f, %r13 + shrq $0x6, %r13 + cmpq %rdi, %r13 + cmovaeq %rdi, %r13 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi + xorq %r11, %r11 + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_toploop: + movq (%r8,%r9,8), %rbx + movq (%r15,%r9,8), %rcx + movq %r11, %r10 + andq %r12, %r10 + andq %rbp, %r11 + movq %rbx, %rax + orq %rcx, %rax + negq %rax + cmovbq %r10, %r14 + cmovbq %r11, %rsi + cmovbq %rbx, %r12 + cmovbq %rcx, %rbp + sbbq %r11, %r11 + incq %r9 + cmpq %r13, %r9 + jb edwards25519_scalarmuldouble_alt_toploop + movq %r12, %rax + orq %rbp, %rax + bsrq %rax, %rcx + xorq $0x3f, %rcx + shldq %cl, %r14, %r12 + shldq %cl, %rsi, %rbp + movq (%r8), %rax + movq %rax, %r14 + movq (%r15), %rax + movq %rax, %rsi + movl $0x1, %r10d + movl $0x0, %r11d + movl $0x0, %ecx + movl $0x1, %edx + movl $0x3a, %r9d + movq %rdi, 0x8(%rsp) + movq %r13, 0x10(%rsp) + movq %r8, (%rsp) + movq %r15, 0x18(%rsp) +edwards25519_scalarmuldouble_alt_innerloop: + xorl %eax, %eax + xorl %ebx, %ebx + xorq %r8, %r8 + xorq %r15, %r15 + btq $0x0, %r14 + cmovbq %rbp, %rax + cmovbq %rsi, %rbx + cmovbq %rcx, %r8 + cmovbq %rdx, %r15 + movq %r14, %r13 + subq %rbx, %r14 + subq %r13, %rbx + movq %r12, %rdi + subq %rax, %rdi + cmovbq %r12, %rbp + leaq -0x1(%rdi), %r12 + cmovbq %rbx, %r14 + cmovbq %r13, %rsi + notq %r12 + cmovbq %r10, %rcx + cmovbq %r11, %rdx + cmovaeq %rdi, %r12 + shrq $1, %r14 + addq %r8, %r10 + addq %r15, %r11 + shrq $1, %r12 + addq %rcx, %rcx + addq %rdx, %rdx + decq %r9 + jne edwards25519_scalarmuldouble_alt_innerloop + movq 0x8(%rsp), %rdi + movq 0x10(%rsp), %r13 + movq (%rsp), %r8 + movq 0x18(%rsp), %r15 + movq %r10, (%rsp) + movq %r11, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rdx, 0x18(%rsp) + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + xorq %r14, %r14 + xorq %rsi, %rsi + xorq %r10, %r10 + xorq %r11, %r11 + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_congloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r12 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %rbp + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq %rdx, %r12 + shrdq $0x3a, %r14, %r10 + movq %r10, (%r8,%r9,8) + movq %r14, %r10 + movq %r12, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3a, %rsi, %r11 + movq %r11, (%r15,%r9,8) + movq %rsi, %r11 + movq %rbp, %rsi + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_alt_congloop + shldq $0x6, %r10, %r14 + shldq $0x6, %r11, %rsi + movq 0x48(%rsp), %r15 + movq (%r8), %rbx + movq 0x28(%rsp), %r12 + imulq %rbx, %r12 + movq (%r15), %rax + mulq %r12 + addq %rbx, %rax + movq %rdx, %r10 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je edwards25519_scalarmuldouble_alt_wmontend +edwards25519_scalarmuldouble_alt_wmontloop: + adcq (%r8,%r9,8), %r10 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %r12 + subq %rbx, %rdx + addq %r10, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r10 + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_alt_wmontloop +edwards25519_scalarmuldouble_alt_wmontend: + adcq %r14, %r10 + movq %r10, -0x8(%r8,%rdi,8) + sbbq %r10, %r10 + negq %r10 + movq %rdi, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_wcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_alt_wcmploop + sbbq $0x0, %r10 + sbbq %r10, %r10 + notq %r10 + xorq %rcx, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_wcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r10, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_alt_wcorrloop + movq 0x40(%rsp), %r8 + movq (%r8), %rbx + movq 0x28(%rsp), %rbp + imulq %rbx, %rbp + movq (%r15), %rax + mulq %rbp + addq %rbx, %rax + movq %rdx, %r11 + movl $0x1, %r9d + movq %rdi, %rcx + decq %rcx + je edwards25519_scalarmuldouble_alt_zmontend +edwards25519_scalarmuldouble_alt_zmontloop: + adcq (%r8,%r9,8), %r11 + sbbq %rbx, %rbx + movq (%r15,%r9,8), %rax + mulq %rbp + subq %rbx, %rdx + addq %r11, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %rdx, %r11 + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_alt_zmontloop +edwards25519_scalarmuldouble_alt_zmontend: + adcq %rsi, %r11 + movq %r11, -0x8(%r8,%rdi,8) + sbbq %r11, %r11 + negq %r11 + movq %rdi, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_zcmploop: + movq (%r8,%r9,8), %rax + sbbq (%r15,%r9,8), %rax + incq %r9 + decq %rcx + jne edwards25519_scalarmuldouble_alt_zcmploop + sbbq $0x0, %r11 + sbbq %r11, %r11 + notq %r11 + xorq %rcx, %rcx + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_zcorrloop: + movq (%r8,%r9,8), %rax + movq (%r15,%r9,8), %rbx + andq %r11, %rbx + negq %rcx + sbbq %rbx, %rax + sbbq %rcx, %rcx + movq %rax, (%r8,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_alt_zcorrloop + movq 0x30(%rsp), %r8 + leaq (%r8,%rdi,8), %r15 + xorq %r9, %r9 + xorq %r12, %r12 + xorq %r14, %r14 + xorq %rbp, %rbp + xorq %rsi, %rsi +edwards25519_scalarmuldouble_alt_crossloop: + movq (%r8,%r9,8), %rcx + movq (%rsp), %rax + mulq %rcx + addq %rax, %r14 + adcq $0x0, %rdx + movq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rcx + addq %rax, %rsi + adcq $0x0, %rdx + movq %rdx, %r11 + movq (%r15,%r9,8), %rcx + movq 0x8(%rsp), %rax + mulq %rcx + subq %r12, %rdx + subq %rax, %r14 + sbbq %rdx, %r10 + sbbq %r12, %r12 + movq %r14, (%r8,%r9,8) + movq %r10, %r14 + movq 0x18(%rsp), %rax + mulq %rcx + subq %rbp, %rdx + subq %rax, %rsi + sbbq %rdx, %r11 + sbbq %rbp, %rbp + movq %rsi, (%r15,%r9,8) + movq %r11, %rsi + incq %r9 + cmpq %r13, %r9 + jb edwards25519_scalarmuldouble_alt_crossloop + xorq %r9, %r9 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r12, %r14 + xorq %rbp, %rsi +edwards25519_scalarmuldouble_alt_optnegloop: + movq (%r8,%r9,8), %rax + xorq %r12, %rax + negq %r10 + adcq $0x0, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rax + xorq %rbp, %rax + negq %r11 + adcq $0x0, %rax + sbbq %r11, %r11 + movq %rax, (%r15,%r9,8) + incq %r9 + cmpq %r13, %r9 + jb edwards25519_scalarmuldouble_alt_optnegloop + subq %r10, %r14 + subq %r11, %rsi + movq %r13, %r9 +edwards25519_scalarmuldouble_alt_shiftloop: + movq -0x8(%r8,%r9,8), %rax + movq %rax, %r10 + shrdq $0x3a, %r14, %rax + movq %rax, -0x8(%r8,%r9,8) + movq %r10, %r14 + movq -0x8(%r15,%r9,8), %rax + movq %rax, %r11 + shrdq $0x3a, %rsi, %rax + movq %rax, -0x8(%r15,%r9,8) + movq %r11, %rsi + decq %r9 + jne edwards25519_scalarmuldouble_alt_shiftloop + notq %rbp + movq 0x48(%rsp), %rcx + movq 0x38(%rsp), %r8 + movq 0x40(%rsp), %r15 + movq %r12, %r10 + movq %rbp, %r11 + xorq %r9, %r9 +edwards25519_scalarmuldouble_alt_fliploop: + movq %rbp, %rdx + movq (%rcx,%r9,8), %rax + andq %rax, %rdx + andq %r12, %rax + movq (%r8,%r9,8), %rbx + xorq %r12, %rbx + negq %r10 + adcq %rbx, %rax + sbbq %r10, %r10 + movq %rax, (%r8,%r9,8) + movq (%r15,%r9,8), %rbx + xorq %rbp, %rbx + negq %r11 + adcq %rbx, %rdx + sbbq %r11, %r11 + movq %rdx, (%r15,%r9,8) + incq %r9 + cmpq %rdi, %r9 + jb edwards25519_scalarmuldouble_alt_fliploop + subq $0x3a, 0x20(%rsp) + ja edwards25519_scalarmuldouble_alt_outerloop + +// Store result + + movq res, %rdi + leaq 96(%rsp), %rsi + leaq 224(%rsp), %rbp + mul_p25519(x_0,x_1,x_2) + + movq res, %rdi + addq $32, %rdi + leaq 128(%rsp), %rsi + leaq 224(%rsp), %rbp + mul_p25519(x_0,x_1,x_2) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// **************************************************************************** +// Localized versions of subroutines. +// These are close to the standalone functions "edwards25519_epdouble" etc., +// but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19. +// **************************************************************************** + +edwards25519_scalarmuldouble_alt_epdouble: + sub $(5*NUMSIZE), %rsp + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(w_0,t1,t4) + mul_4(x_0,t1,t3) + add $(5*NUMSIZE), %rsp + ret + +edwards25519_scalarmuldouble_alt_pdouble: + sub $(5*NUMSIZE), %rsp + add_twice4(t0,x_1,y_1) + sqr_4(t1,z_1) + sqr_4(t2,x_1) + sqr_4(t3,y_1) + double_twice4(t1,t1) + sqr_4(t0,t0) + add_twice4(t4,t2,t3) + sub_twice4(t2,t2,t3) + add_twice4(t3,t1,t2) + sub_twice4(t1,t4,t0) + mul_4(y_0,t2,t4) + mul_4(z_0,t3,t2) + mul_4(x_0,t1,t3) + add $(5*NUMSIZE), %rsp + ret + +edwards25519_scalarmuldouble_alt_epadd: + sub $(6*NUMSIZE), %rsp + mul_4(t0,w_1,w_2) + sub_twice4(t1,y_1,x_1) + sub_twice4(t2,y_2,x_2) + add_twice4(t3,y_1,x_1) + add_twice4(t4,y_2,x_2) + double_twice4(t5,z_2) + mul_4(t1,t1,t2) + mul_4(t3,t3,t4) + load_k25519(t2) + mul_4(t2,t2,t0) + mul_4(t4,z_1,t5) + sub_twice4(t0,t3,t1) + add_twice4(t5,t3,t1) + sub_twice4(t1,t4,t2) + add_twice4(t3,t4,t2) + mul_4(w_0,t0,t5) + mul_4(x_0,t0,t1) + mul_4(y_0,t3,t5) + mul_4(z_0,t1,t3) + add $(6*NUMSIZE), %rsp + ret + +edwards25519_scalarmuldouble_alt_pepadd: + sub $(6*NUMSIZE), %rsp + double_twice4(t0,z_1); + sub_twice4(t1,y_1,x_1); + add_twice4(t2,y_1,x_1); + mul_4(t3,w_1,z_2); + mul_4(t1,t1,x_2); + mul_4(t2,t2,y_2); + sub_twice4(t4,t0,t3); + add_twice4(t0,t0,t3); + sub_twice4(t5,t2,t1); + add_twice4(t1,t2,t1); + mul_4(z_0,t4,t0); + mul_4(x_0,t5,t4); + mul_4(y_0,t0,t1); + mul_4(w_0,t5,t1); + add $(6*NUMSIZE), %rsp + ret + +// **************************************************************************** +// The precomputed data (all read-only). This is currently part of the same +// text section, which gives position-independent code with simple PC-relative +// addressing. However it could be put in a separate section via something like +// +// .section .rodata +// **************************************************************************** + +// The modulus p_25519 = 2^255 - 19, for the modular inverse + +edwards25519_scalarmuldouble_alt_p25519: + .quad 0xffffffffffffffed + .quad 0xffffffffffffffff + .quad 0xffffffffffffffff + .quad 0x7fffffffffffffff + +// Precomputed table of multiples of generator for edwards25519 +// all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. + +edwards25519_scalarmuldouble_alt_table: + + // 1 * G + + .quad 0x9d103905d740913e + .quad 0xfd399f05d140beb3 + .quad 0xa5c18434688f8a09 + .quad 0x44fd2f9298f81267 + .quad 0x2fbc93c6f58c3b85 + .quad 0xcf932dc6fb8c0e19 + .quad 0x270b4898643d42c2 + .quad 0x07cf9d3a33d4ba65 + .quad 0xabc91205877aaa68 + .quad 0x26d9e823ccaac49e + .quad 0x5a1b7dcbdd43598c + .quad 0x6f117b689f0c65a8 + + // 2 * G + + .quad 0x8a99a56042b4d5a8 + .quad 0x8f2b810c4e60acf6 + .quad 0xe09e236bb16e37aa + .quad 0x6bb595a669c92555 + .quad 0x9224e7fc933c71d7 + .quad 0x9f469d967a0ff5b5 + .quad 0x5aa69a65e1d60702 + .quad 0x590c063fa87d2e2e + .quad 0x43faa8b3a59b7a5f + .quad 0x36c16bdd5d9acf78 + .quad 0x500fa0840b3d6a31 + .quad 0x701af5b13ea50b73 + + // 3 * G + + .quad 0x56611fe8a4fcd265 + .quad 0x3bd353fde5c1ba7d + .quad 0x8131f31a214bd6bd + .quad 0x2ab91587555bda62 + .quad 0xaf25b0a84cee9730 + .quad 0x025a8430e8864b8a + .quad 0xc11b50029f016732 + .quad 0x7a164e1b9a80f8f4 + .quad 0x14ae933f0dd0d889 + .quad 0x589423221c35da62 + .quad 0xd170e5458cf2db4c + .quad 0x5a2826af12b9b4c6 + + // 4 * G + + .quad 0x95fe050a056818bf + .quad 0x327e89715660faa9 + .quad 0xc3e8e3cd06a05073 + .quad 0x27933f4c7445a49a + .quad 0x287351b98efc099f + .quad 0x6765c6f47dfd2538 + .quad 0xca348d3dfb0a9265 + .quad 0x680e910321e58727 + .quad 0x5a13fbe9c476ff09 + .quad 0x6e9e39457b5cc172 + .quad 0x5ddbdcf9102b4494 + .quad 0x7f9d0cbf63553e2b + + // 5 * G + + .quad 0x7f9182c3a447d6ba + .quad 0xd50014d14b2729b7 + .quad 0xe33cf11cb864a087 + .quad 0x154a7e73eb1b55f3 + .quad 0xa212bc4408a5bb33 + .quad 0x8d5048c3c75eed02 + .quad 0xdd1beb0c5abfec44 + .quad 0x2945ccf146e206eb + .quad 0xbcbbdbf1812a8285 + .quad 0x270e0807d0bdd1fc + .quad 0xb41b670b1bbda72d + .quad 0x43aabe696b3bb69a + + // 6 * G + + .quad 0x499806b67b7d8ca4 + .quad 0x575be28427d22739 + .quad 0xbb085ce7204553b9 + .quad 0x38b64c41ae417884 + .quad 0x3a0ceeeb77157131 + .quad 0x9b27158900c8af88 + .quad 0x8065b668da59a736 + .quad 0x51e57bb6a2cc38bd + .quad 0x85ac326702ea4b71 + .quad 0xbe70e00341a1bb01 + .quad 0x53e4a24b083bc144 + .quad 0x10b8e91a9f0d61e3 + + // 7 * G + + .quad 0xba6f2c9aaa3221b1 + .quad 0x6ca021533bba23a7 + .quad 0x9dea764f92192c3a + .quad 0x1d6edd5d2e5317e0 + .quad 0x6b1a5cd0944ea3bf + .quad 0x7470353ab39dc0d2 + .quad 0x71b2528228542e49 + .quad 0x461bea69283c927e + .quad 0xf1836dc801b8b3a2 + .quad 0xb3035f47053ea49a + .quad 0x529c41ba5877adf3 + .quad 0x7a9fbb1c6a0f90a7 + + // 8 * G + + .quad 0xe2a75dedf39234d9 + .quad 0x963d7680e1b558f9 + .quad 0x2c2741ac6e3c23fb + .quad 0x3a9024a1320e01c3 + .quad 0x59b7596604dd3e8f + .quad 0x6cb30377e288702c + .quad 0xb1339c665ed9c323 + .quad 0x0915e76061bce52f + .quad 0xe7c1f5d9c9a2911a + .quad 0xb8a371788bcca7d7 + .quad 0x636412190eb62a32 + .quad 0x26907c5c2ecc4e95 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From 5ec77506bfaf2664534c35e0aecc127fa18919b9 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Sat, 22 Jul 2023 23:57:31 +0000 Subject: [PATCH 36/42] Avoid using assembler macros and rename labels in the functions for RSA This patch updates the functions used by RSA signing to avoid using assembler macros because the `delocate` utility in BoringSSL cannot parse them. Also, the offsets in load/store instructions are simplified so that `delocate` can understand them as well. Also, the labels having overlapping names are renamed so that aws-lc can compile them without duplicate symbol found errors. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/3750058af0886a4c912aa1ad673ac54136bdb44c --- arm/fastmul/bignum_emontredc_8n.S | 57 +++-- arm/fastmul/bignum_kmul_16_32.S | 60 +++-- arm/fastmul/bignum_kmul_32_64.S | 400 +++++++++++++++++++++++++----- arm/fastmul/bignum_ksqr_16_32.S | 60 +++-- arm/fastmul/bignum_ksqr_32_64.S | 350 +++++++++++++++++++++----- arm/generic/bignum_ge.S | 30 +-- arm/generic/bignum_mul.S | 16 +- arm/generic/bignum_optsub.S | 8 +- arm/generic/bignum_sqr.S | 22 +- 9 files changed, 767 insertions(+), 236 deletions(-) diff --git a/arm/fastmul/bignum_emontredc_8n.S b/arm/fastmul/bignum_emontredc_8n.S index f66d536048..0876ddea8b 100644 --- a/arm/fastmul/bignum_emontredc_8n.S +++ b/arm/fastmul/bignum_emontredc_8n.S @@ -28,21 +28,20 @@ // t,h should not overlap w,z // --------------------------------------------------------------------------- -.macro muldiffnadd b,a, c,h,l,t, x,y, w,z - subs \t, \x, \y - cneg \t, \t, cc - csetm \c, cc - subs \h, \w, \z - cneg \h, \h, cc - mul \l, \t, \h - umulh \h, \t, \h - cinv \c, \c, cc - adds xzr, \c, #1 - eor \l, \l, \c - adcs \a, \a, \l - eor \h, \h, \c - adcs \b, \b, \h -.endm +#define muldiffnadd(b,a, c,h,l,t, x,y, w,z) \ + subs t, x, y ; \ + cneg t, t, cc ; \ + csetm c, cc ; \ + subs h, w, z ; \ + cneg h, h, cc ; \ + mul l, t, h ; \ + umulh h, t, h ; \ + cinv c, c, cc ; \ + adds xzr, c, #1 ; \ + eor l, l, c ; \ + adcs a, a, l ; \ + eor h, h, c ; \ + adcs b, b, h // The inputs, though k gets processed so we use a different name @@ -159,31 +158,31 @@ // Now add in all the "complicated" terms. - muldiffnadd u6,u5, c,h,l,t, a2,a3, b3,b2 + muldiffnadd (u6,u5, c,h,l,t, a2,a3, b3,b2) adc u7, u7, c - muldiffnadd u2,u1, c,h,l,t, a0,a1, b1,b0 + muldiffnadd (u2,u1, c,h,l,t, a0,a1, b1,b0) adcs u3, u3, c adcs u4, u4, c adcs u5, u5, c adcs u6, u6, c adc u7, u7, c - muldiffnadd u5,u4, c,h,l,t, a1,a3, b3,b1 + muldiffnadd (u5,u4, c,h,l,t, a1,a3, b3,b1) adcs u6, u6, c adc u7, u7, c - muldiffnadd u3,u2, c,h,l,t, a0,a2, b2,b0 + muldiffnadd (u3,u2, c,h,l,t, a0,a2, b2,b0) adcs u4, u4, c adcs u5, u5, c adcs u6, u6, c adc u7, u7, c - muldiffnadd u4,u3, c,h,l,t, a0,a3, b3,b0 + muldiffnadd (u4,u3, c,h,l,t, a0,a3, b3,b0) adcs u5, u5, c adcs u6, u6, c adc u7, u7, c - muldiffnadd u4,u3, c,h,l,t, a1,a2, b2,b1 + muldiffnadd (u4,u3, c,h,l,t, a1,a2, b2,b1) adcs c1, u5, c adcs c2, u6, c adc c3, u7, c @@ -212,7 +211,7 @@ stp x19, x20, [sp, #-16]! lsr k4m1, x0, #2 mov i, k4m1 subs c, k4m1, #1 - bcc end + bcc bignum_emontredc_8n_end mov tc, xzr lsl k4m1, c, #5 @@ -220,7 +219,7 @@ stp x19, x20, [sp, #-16]! // Rather than propagating the carry to the end each time, we // stop at the "natural" end and store top carry in tc as a bitmask. -outerloop: +bignum_emontredc_8n_outerloop: // Load [u3;u2;u1;u0] = bottom 4 digits of the input at current window @@ -326,9 +325,9 @@ outerloop: // Repeated multiply-add block to do the k/4-1 remaining 4-digit chunks - cbz k4m1, madddone + cbz k4m1, bignum_emontredc_8n_madddone mov j, k4m1 -maddloop: +bignum_emontredc_8n_maddloop: add m, m, #32 add z, z, #32 @@ -336,8 +335,8 @@ maddloop: ldp b2, b3, [m, #16] madd4 subs j, j, #32 - bne maddloop -madddone: + bne bignum_emontredc_8n_maddloop +bignum_emontredc_8n_madddone: // Add the carry out to the existing z contents, propagating the // top carry tc up by 32 places as we move "leftwards". @@ -362,13 +361,13 @@ madddone: add z, z, #32 subs i, i, #1 - bne outerloop + bne bignum_emontredc_8n_outerloop // Return the top carry as 0 or 1 (it's currently a bitmask) neg x0, tc -end: +bignum_emontredc_8n_end: ldp x27, x28, [sp], #16 ldp x25, x26, [sp], #16 ldp x23, x24, [sp], #16 diff --git a/arm/fastmul/bignum_kmul_16_32.S b/arm/fastmul/bignum_kmul_16_32.S index 11dc1a7231..2367b69891 100644 --- a/arm/fastmul/bignum_kmul_16_32.S +++ b/arm/fastmul/bignum_kmul_16_32.S @@ -53,7 +53,7 @@ S2N_BN_SYMBOL(bignum_kmul_16_32): // Compute L = x_lo * y_lo in bottom half of buffer (size 8 x 8 -> 16) - bl local_mul_8_16 + bl bignum_kmul_16_32_local_mul_8_16 // Compute absolute difference [t..] = |x_lo - x_hi| // and the sign s = sgn(x_lo - x_hi) as a bitmask (all 1s for negative) @@ -102,7 +102,7 @@ S2N_BN_SYMBOL(bignum_kmul_16_32): add x0, z, #128 add x1, x, #64 add x2, y, #64 - bl local_mul_8_16 + bl bignum_kmul_16_32_local_mul_8_16 // Compute the other absolute difference [t+8..] = |y_hi - y_lo| // Collect the combined product sign bitmask (all 1s for negative) in s @@ -150,38 +150,56 @@ S2N_BN_SYMBOL(bignum_kmul_16_32): // Compute H' = H + L_top in place of H (it cannot overflow) // First add 8-sized block then propagate carry through next 8 - .set I, 0 - - ldp x10, x11, [z, #128+8*I] - ldp x12, x13, [z, #64+8*I] + ldp x10, x11, [z, #128] + ldp x12, x13, [z, #64] adds x10, x10, x12 adcs x11, x11, x13 - stp x10, x11, [z, #128+8*I] - .set I, (I+2) + stp x10, x11, [z, #128] + + ldp x10, x11, [z, #128+16] + ldp x12, x13, [z, #64+16] + adcs x10, x10, x12 + adcs x11, x11, x13 + stp x10, x11, [z, #128+16] -.rep 3 - ldp x10, x11, [z, #128+8*I] - ldp x12, x13, [z, #64+8*I] + ldp x10, x11, [z, #128+32] + ldp x12, x13, [z, #64+32] adcs x10, x10, x12 adcs x11, x11, x13 - stp x10, x11, [z, #128+8*I] - .set I, (I+2) -.endr + stp x10, x11, [z, #128+32] + + ldp x10, x11, [z, #128+48] + ldp x12, x13, [z, #64+48] + adcs x10, x10, x12 + adcs x11, x11, x13 + stp x10, x11, [z, #128+48] + + ldp x10, x11, [z, #128+64] + adcs x10, x10, xzr + adcs x11, x11, xzr + stp x10, x11, [z, #128+64] + + ldp x10, x11, [z, #128+80] + adcs x10, x10, xzr + adcs x11, x11, xzr + stp x10, x11, [z, #128+80] + + ldp x10, x11, [z, #128+96] + adcs x10, x10, xzr + adcs x11, x11, xzr + stp x10, x11, [z, #128+96] -.rep 4 - ldp x10, x11, [z, #128+8*I] + ldp x10, x11, [z, #128+112] adcs x10, x10, xzr adcs x11, x11, xzr - stp x10, x11, [z, #128+8*I] - .set I, (I+2) -.endr + stp x10, x11, [z, #128+112] // Compute M = |x_lo - x_hi| * |y_hi - y_lo| in [t+16...], size 16 add x0, t, #128 mov x1, t add x2, t, #64 - bl local_mul_8_16 + bl bignum_kmul_16_32_local_mul_8_16 // Add the interlocking H' and L_bot terms, storing in registers x15..x0 // Intercept the carry at the 8 + 16 = 24 position and store it in x. @@ -315,7 +333,7 @@ S2N_BN_SYMBOL(bignum_kmul_16_32): // Local copy of bignum_mul_8_16 without the scratch register save/restore // ----------------------------------------------------------------------- -local_mul_8_16: +bignum_kmul_16_32_local_mul_8_16: ldp x3, x4, [x1] ldp x7, x8, [x2] ldp x5, x6, [x1, #16] diff --git a/arm/fastmul/bignum_kmul_32_64.S b/arm/fastmul/bignum_kmul_32_64.S index 42286ff36c..467d298697 100644 --- a/arm/fastmul/bignum_kmul_32_64.S +++ b/arm/fastmul/bignum_kmul_32_64.S @@ -22,7 +22,7 @@ .balign 4 #define K 16 -#define L (K/2) +#define L 8 // this is (K/2) #define z x19 #define x x20 @@ -49,7 +49,7 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): // Compute L = x_lo * y_lo in bottom half of buffer (size 16 x 16 -> 32) - bl local_kmul_16_32 + bl bignum_kmul_32_64_local_kmul_16_32 // Compute H = x_hi * y_hi in top half of buffer (size 16 x 16 -> 32) @@ -57,7 +57,7 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): add x1, x, #8*K add x2, y, #8*K mov x3, t - bl local_kmul_16_32 + bl bignum_kmul_32_64_local_kmul_16_32 // Compute absolute difference [t..] = |x_lo - x_hi| // and the sign x = sgn(x_lo - x_hi) as a bitmask (all 1s for negative) @@ -208,82 +208,141 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): adcs x0, x0, xzr eor x1, x1, y adcs x1, x1, xzr - stp x0, x1, [t, #8*K] + stp x0, x1, [t, #128] eor x2, x2, y adcs x2, x2, xzr eor x3, x3, y adcs x3, x3, xzr - stp x2, x3, [t, #8*K+16] + stp x2, x3, [t, #128+16] eor x4, x4, y adcs x4, x4, xzr eor x5, x5, y adcs x5, x5, xzr - stp x4, x5, [t, #8*K+32] + stp x4, x5, [t, #128+32] eor x6, x6, y adcs x6, x6, xzr eor x7, x7, y adcs x7, x7, xzr - stp x6, x7, [t, #8*K+48] + stp x6, x7, [t, #128+48] eor x8, x8, y adcs x8, x8, xzr eor x9, x9, y adcs x9, x9, xzr - stp x8, x9, [t, #8*K+64] + stp x8, x9, [t, #128+64] eor x10, x10, y adcs x10, x10, xzr eor x11, x11, y adcs x11, x11, xzr - stp x10, x11, [t, #8*K+80] + stp x10, x11, [t, #128+80] eor x12, x12, y adcs x12, x12, xzr eor x13, x13, y adcs x13, x13, xzr - stp x12, x13, [t, #8*K+96] + stp x12, x13, [t, #128+96] eor x14, x14, y adcs x14, x14, xzr eor x15, x15, y adc x15, x15, xzr - stp x14, x15, [t, #8*K+112] + stp x14, x15, [t, #128+112] eor y, y, x // Compute H' = H + L_top in place of H (it cannot overflow) - ldp x0, x1, [z, #16*K] + ldp x0, x1, [z, #16*16] ldp x2, x3, [z, #16*L] adds x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*K] + stp x0, x1, [z, #16*16] - .set I, 1 - .rep (L-1) - ldp x0, x1, [z, #16*(K+I)] - ldp x2, x3, [z, #16*(L+I)] + ldp x0, x1, [z, #16*17] + ldp x2, x3, [z, #16*9] adcs x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*(K+I)] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*17] - .rep (L-1) - ldp x0, x1, [z, #16*(K+I)] + ldp x0, x1, [z, #16*18] + ldp x2, x3, [z, #16*10] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*18] + + ldp x0, x1, [z, #16*19] + ldp x2, x3, [z, #16*11] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*19] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [z, #16*12] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*20] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [z, #16*13] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*21] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [z, #16*14] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*22] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [z, #16*15] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*23] + + ldp x0, x1, [z, #16*24] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*24] + + ldp x0, x1, [z, #16*25] adcs x0, x0, xzr adcs x1, x1, xzr - stp x0, x1, [z, #16*(K+I)] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*25] - ldp x0, x1, [z, #16*(K+I)] + ldp x0, x1, [z, #16*26] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*26] + + ldp x0, x1, [z, #16*27] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*27] + + ldp x0, x1, [z, #16*28] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*28] + + ldp x0, x1, [z, #16*29] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*29] + + ldp x0, x1, [z, #16*30] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*30] + + ldp x0, x1, [z, #16*31] adcs x0, x0, xzr adc x1, x1, xzr - stp x0, x1, [z, #16*(K+I)] + stp x0, x1, [z, #16*31] // Compute M = |x_lo - x_hi| * |y_hi - y_lo|, size 32 @@ -291,37 +350,107 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): mov x1, t add x2, t, #8*K add x3, t, #32*K - bl local_kmul_16_32 + bl bignum_kmul_32_64_local_kmul_16_32 // Add the interlocking H' and L_bot terms // Intercept the carry at the 3k position and store it in x. // Again, we no longer need the input x was pointing at. - ldp x0, x1, [z, #16*K] + ldp x0, x1, [z, #16*16] ldp x2, x3, [z] adds x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*L] + stp x0, x1, [z, #16*8] + + ldp x0, x1, [z, #16*17] + ldp x2, x3, [z, #16*1] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*9] - .set I, 1 - .rep (L-1) - ldp x0, x1, [z, #16*(K+I)] - ldp x2, x3, [z, #16*I] + ldp x0, x1, [z, #16*18] + ldp x2, x3, [z, #16*2] adcs x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*(L+I)] - .set I, (I+1) - .endr - - .set I, 0 - .rep L - ldp x0, x1, [z, #16*(K+I)] - ldp x2, x3, [z, #16*(3*L+I)] + stp x0, x1, [z, #16*10] + + ldp x0, x1, [z, #16*19] + ldp x2, x3, [z, #16*3] adcs x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*(K+I)] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*11] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [z, #16*4] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*12] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [z, #16*5] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*13] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [z, #16*6] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*14] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [z, #16*7] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*15] + + ldp x0, x1, [z, #16*16] + ldp x2, x3, [z, #16*24] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*16] + + ldp x0, x1, [z, #16*17] + ldp x2, x3, [z, #16*25] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*17] + + ldp x0, x1, [z, #16*18] + ldp x2, x3, [z, #16*26] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*18] + + ldp x0, x1, [z, #16*19] + ldp x2, x3, [z, #16*27] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*19] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [z, #16*28] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*20] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [z, #16*29] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*21] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [z, #16*30] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*22] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [z, #16*31] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*23] cset x, cs @@ -329,17 +458,133 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): cmn y, y - .set I, L - .rep K - ldp x0, x1, [z, #16*I] - ldp x2, x3, [t, #8*K+16*I] + ldp x0, x1, [z, #128] + ldp x2, x3, [t, #128+128] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #128] + + ldp x0, x1, [z, #144] + ldp x2, x3, [t, #128+144] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #144] + + ldp x0, x1, [z, #160] + ldp x2, x3, [t, #128+160] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #160] + + ldp x0, x1, [z, #176] + ldp x2, x3, [t, #128+176] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #176] + + ldp x0, x1, [z, #192] + ldp x2, x3, [t, #128+192] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #192] + + ldp x0, x1, [z, #208] + ldp x2, x3, [t, #128+208] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #208] + + ldp x0, x1, [z, #224] + ldp x2, x3, [t, #128+224] eor x2, x2, y adcs x0, x0, x2 eor x3, x3, y adcs x1, x1, x3 - stp x0, x1, [z, #16*I] - .set I, (I+1) - .endr + stp x0, x1, [z, #224] + + ldp x0, x1, [z, #240] + ldp x2, x3, [t, #128+240] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #240] + + ldp x0, x1, [z, #256] + ldp x2, x3, [t, #128+256] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #256] + + ldp x0, x1, [z, #272] + ldp x2, x3, [t, #128+272] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #272] + + ldp x0, x1, [z, #288] + ldp x2, x3, [t, #128+288] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #288] + + ldp x0, x1, [z, #304] + ldp x2, x3, [t, #128+304] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #304] + + ldp x0, x1, [z, #320] + ldp x2, x3, [t, #128+320] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #320] + + ldp x0, x1, [z, #336] + ldp x2, x3, [t, #128+336] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #336] + + ldp x0, x1, [z, #352] + ldp x2, x3, [t, #128+352] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #352] + + ldp x0, x1, [z, #368] + ldp x2, x3, [t, #128+368] + eor x2, x2, y + adcs x0, x0, x2 + eor x3, x3, y + adcs x1, x1, x3 + stp x0, x1, [z, #368] // Get the next digits effectively resulting so far starting at 3k // [...,c,c,c,c,x] @@ -349,24 +594,45 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): // Now propagate through the top quarter of the result - ldp x0, x1, [z, #16*3*L] + ldp x0, x1, [z, #16*24] adds x0, x0, x adcs x1, x1, c - stp x0, x1, [z, #16*3*L] + stp x0, x1, [z, #16*24] + + ldp x0, x1, [z, #16*25] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*25] + + ldp x0, x1, [z, #16*26] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*26] + + ldp x0, x1, [z, #16*27] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*27] + + ldp x0, x1, [z, #16*28] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*28] + + ldp x0, x1, [z, #16*29] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*29] - .set I, 3*L+1 - .rep (L-2) - ldp x0, x1, [z, #16*I] + ldp x0, x1, [z, #16*30] adcs x0, x0, c adcs x1, x1, c - stp x0, x1, [z, #16*I] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*30] - ldp x0, x1, [z, #16*I] + ldp x0, x1, [z, #16*31] adcs x0, x0, c adc x1, x1, c - stp x0, x1, [z, #16*I] + stp x0, x1, [z, #16*31] // Restore and return @@ -382,7 +648,7 @@ S2N_BN_SYMBOL(bignum_kmul_32_64): // only preserves the key registers we need to be stable in the main code. // This includes in turn a copy of bignum_mul_8_16. -local_kmul_16_32: +bignum_kmul_32_64_local_kmul_16_32: stp x19, x20, [sp, -16]! stp x21, x22, [sp, -16]! stp x23, x30, [sp, -16]! @@ -390,7 +656,7 @@ local_kmul_16_32: mov x26, x1 mov x27, x2 mov x28, x3 - bl local_mul_8_16 + bl bignum_kmul_32_64_local_mul_8_16 ldp x10, x11, [x26] ldp x8, x9, [x26, #64] subs x10, x10, x8 @@ -432,7 +698,7 @@ local_kmul_16_32: add x0, x25, #0x80 add x1, x26, #0x40 add x2, x27, #0x40 - bl local_mul_8_16 + bl bignum_kmul_32_64_local_mul_8_16 ldp x10, x11, [x27] ldp x8, x9, [x27, #64] subs x10, x8, x10 @@ -511,7 +777,7 @@ local_kmul_16_32: add x0, x28, #0x80 mov x1, x28 add x2, x28, #0x40 - bl local_mul_8_16 + bl bignum_kmul_32_64_local_mul_8_16 ldp x0, x1, [x25] ldp x16, x17, [x25, #128] adds x0, x0, x16 @@ -617,7 +883,7 @@ local_kmul_16_32: ldp x19, x20, [sp], #16 ret -local_mul_8_16: +bignum_kmul_32_64_local_mul_8_16: ldp x3, x4, [x1] ldp x7, x8, [x2] ldp x5, x6, [x1, #16] diff --git a/arm/fastmul/bignum_ksqr_16_32.S b/arm/fastmul/bignum_ksqr_16_32.S index 9d7c9e7af0..bb62a9c0ca 100644 --- a/arm/fastmul/bignum_ksqr_16_32.S +++ b/arm/fastmul/bignum_ksqr_16_32.S @@ -48,7 +48,7 @@ S2N_BN_SYMBOL(bignum_ksqr_16_32): // Compute L = x_lo * y_lo in bottom half of buffer (size 8 x 8 -> 16) - bl local_sqr_8_16 + bl bignum_ksqr_16_32_local_sqr_8_16 // Compute absolute difference [t..] = |x_lo - x_hi| @@ -95,42 +95,60 @@ S2N_BN_SYMBOL(bignum_ksqr_16_32): add x0, z, #128 add x1, x, #64 - bl local_sqr_8_16 + bl bignum_ksqr_16_32_local_sqr_8_16 // Compute H' = H + L_top in place of H (it cannot overflow) // First add 8-sized block then propagate carry through next 8 - .set I, 0 - - ldp x10, x11, [z, #128+8*I] - ldp x12, x13, [z, #64+8*I] + ldp x10, x11, [z, #128] + ldp x12, x13, [z, #64] adds x10, x10, x12 adcs x11, x11, x13 - stp x10, x11, [z, #128+8*I] - .set I, (I+2) + stp x10, x11, [z, #128] + + ldp x10, x11, [z, #128+16] + ldp x12, x13, [z, #64+16] + adcs x10, x10, x12 + adcs x11, x11, x13 + stp x10, x11, [z, #128+16] -.rep 3 - ldp x10, x11, [z, #128+8*I] - ldp x12, x13, [z, #64+8*I] + ldp x10, x11, [z, #128+32] + ldp x12, x13, [z, #64+32] adcs x10, x10, x12 adcs x11, x11, x13 - stp x10, x11, [z, #128+8*I] - .set I, (I+2) -.endr + stp x10, x11, [z, #128+32] + + ldp x10, x11, [z, #128+48] + ldp x12, x13, [z, #64+48] + adcs x10, x10, x12 + adcs x11, x11, x13 + stp x10, x11, [z, #128+48] + + ldp x10, x11, [z, #128+64] + adcs x10, x10, xzr + adcs x11, x11, xzr + stp x10, x11, [z, #128+64] + + ldp x10, x11, [z, #128+80] + adcs x10, x10, xzr + adcs x11, x11, xzr + stp x10, x11, [z, #128+80] + + ldp x10, x11, [z, #128+96] + adcs x10, x10, xzr + adcs x11, x11, xzr + stp x10, x11, [z, #128+96] -.rep 4 - ldp x10, x11, [z, #128+8*I] + ldp x10, x11, [z, #128+112] adcs x10, x10, xzr adcs x11, x11, xzr - stp x10, x11, [z, #128+8*I] - .set I, (I+2) -.endr + stp x10, x11, [z, #128+112] // Compute M = |x_lo - x_hi| * |y_hi - y_lo| in [t+8...], size 16 add x0, t, #64 mov x1, t - bl local_sqr_8_16 + bl bignum_ksqr_16_32_local_sqr_8_16 // Add the interlocking H' and L_bot terms, storing in registers x15..x0 // Intercept the carry at the 8 + 16 = 24 position and store it in x. @@ -244,7 +262,7 @@ S2N_BN_SYMBOL(bignum_ksqr_16_32): // the same as bignum_sqr_8_16 without the scratch register preservation. // ----------------------------------------------------------------------------- -local_sqr_8_16: +bignum_ksqr_16_32_local_sqr_8_16: ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] diff --git a/arm/fastmul/bignum_ksqr_32_64.S b/arm/fastmul/bignum_ksqr_32_64.S index 0405db1af5..fbd3c47bec 100644 --- a/arm/fastmul/bignum_ksqr_32_64.S +++ b/arm/fastmul/bignum_ksqr_32_64.S @@ -21,7 +21,7 @@ .balign 4 #define K 16 -#define L (K/2) +#define L 8 // (K/2) #define z x19 #define x x20 @@ -43,45 +43,104 @@ S2N_BN_SYMBOL(bignum_ksqr_32_64): // Compute L = x_lo * y_lo in bottom half of buffer (size 16 x 16 -> 32) - bl local_ksqr_16_32 + bl bignum_ksqr_32_64_local_ksqr_16_32 // Compute H = x_hi * y_hi in top half of buffer (size 16 x 16 -> 32) add x0, z, #16*K add x1, x, #8*K mov x2, t - bl local_ksqr_16_32 + bl bignum_ksqr_32_64_local_ksqr_16_32 // Compute H' = H + L_top in place of H (it cannot overflow) - ldp x0, x1, [z, #16*K] - ldp x2, x3, [z, #16*L] + ldp x0, x1, [z, #16*16] + ldp x2, x3, [z, #16*8] adds x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*K] + stp x0, x1, [z, #16*16] - .set I, 1 - .rep (L-1) - ldp x0, x1, [z, #16*(K+I)] - ldp x2, x3, [z, #16*(L+I)] + ldp x0, x1, [z, #16*17] + ldp x2, x3, [z, #16*9] adcs x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*(K+I)] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*17] - .rep (L-1) - ldp x0, x1, [z, #16*(K+I)] + ldp x0, x1, [z, #16*18] + ldp x2, x3, [z, #16*10] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*18] + + ldp x0, x1, [z, #16*19] + ldp x2, x3, [z, #16*11] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*19] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [z, #16*12] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*20] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [z, #16*13] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*21] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [z, #16*14] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*22] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [z, #16*15] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*23] + + ldp x0, x1, [z, #16*24] adcs x0, x0, xzr adcs x1, x1, xzr - stp x0, x1, [z, #16*(K+I)] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*24] - ldp x0, x1, [z, #16*(K+I)] + ldp x0, x1, [z, #16*25] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*25] + + ldp x0, x1, [z, #16*26] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*26] + + ldp x0, x1, [z, #16*27] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*27] + + ldp x0, x1, [z, #16*28] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*28] + + ldp x0, x1, [z, #16*29] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*29] + + ldp x0, x1, [z, #16*30] + adcs x0, x0, xzr + adcs x1, x1, xzr + stp x0, x1, [z, #16*30] + + ldp x0, x1, [z, #16*31] adcs x0, x0, xzr adc x1, x1, xzr - stp x0, x1, [z, #16*(K+I)] + stp x0, x1, [z, #16*31] // Compute absolute difference [t..] = |x_lo - x_hi| @@ -182,37 +241,107 @@ S2N_BN_SYMBOL(bignum_ksqr_32_64): add x0, t, #8*K mov x1, t add x2, t, #24*K - bl local_ksqr_16_32 + bl bignum_ksqr_32_64_local_ksqr_16_32 // Add the interlocking H' and L_bot terms // Intercept the carry at the 3k position and store it in x. // (Note that we no longer need the input x was pointing at.) - ldp x0, x1, [z, #16*K] + ldp x0, x1, [z, #16*16] ldp x2, x3, [z] adds x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*L] + stp x0, x1, [z, #16*8] + + ldp x0, x1, [z, #16*17] + ldp x2, x3, [z, #16*1] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*9] + + ldp x0, x1, [z, #16*18] + ldp x2, x3, [z, #16*2] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*10] + + ldp x0, x1, [z, #16*19] + ldp x2, x3, [z, #16*3] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*11] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [z, #16*4] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*12] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [z, #16*5] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*13] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [z, #16*6] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*14] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [z, #16*7] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*15] + + ldp x0, x1, [z, #16*16] + ldp x2, x3, [z, #16*24] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*16] + + ldp x0, x1, [z, #16*17] + ldp x2, x3, [z, #16*25] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*17] + + ldp x0, x1, [z, #16*18] + ldp x2, x3, [z, #16*26] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*18] - .set I, 1 - .rep (L-1) - ldp x0, x1, [z, #16*(K+I)] - ldp x2, x3, [z, #16*I] + ldp x0, x1, [z, #16*19] + ldp x2, x3, [z, #16*27] adcs x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*(L+I)] - .set I, (I+1) - .endr - - .set I, 0 - .rep L - ldp x0, x1, [z, #16*(K+I)] - ldp x2, x3, [z, #16*(3*L+I)] + stp x0, x1, [z, #16*19] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [z, #16*28] adcs x0, x0, x2 adcs x1, x1, x3 - stp x0, x1, [z, #16*(K+I)] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*20] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [z, #16*29] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*21] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [z, #16*30] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*22] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [z, #16*31] + adcs x0, x0, x2 + adcs x1, x1, x3 + stp x0, x1, [z, #16*23] cset x, cs @@ -224,15 +353,95 @@ S2N_BN_SYMBOL(bignum_ksqr_32_64): sbcs x1, x1, x3 stp x0, x1, [z, #16*L] - .set I, L+1 - .rep (K-1) - ldp x0, x1, [z, #16*I] - ldp x2, x3, [t, #16*I] + ldp x0, x1, [z, #16*9] + ldp x2, x3, [t, #16*9] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*9] + + ldp x0, x1, [z, #16*10] + ldp x2, x3, [t, #16*10] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*10] + + ldp x0, x1, [z, #16*11] + ldp x2, x3, [t, #16*11] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*11] + + ldp x0, x1, [z, #16*12] + ldp x2, x3, [t, #16*12] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*12] + + ldp x0, x1, [z, #16*13] + ldp x2, x3, [t, #16*13] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*13] + + ldp x0, x1, [z, #16*14] + ldp x2, x3, [t, #16*14] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*14] + + ldp x0, x1, [z, #16*15] + ldp x2, x3, [t, #16*15] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*15] + + ldp x0, x1, [z, #16*16] + ldp x2, x3, [t, #16*16] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*16] + + ldp x0, x1, [z, #16*17] + ldp x2, x3, [t, #16*17] sbcs x0, x0, x2 sbcs x1, x1, x3 - stp x0, x1, [z, #16*I] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*17] + + ldp x0, x1, [z, #16*18] + ldp x2, x3, [t, #16*18] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*18] + + ldp x0, x1, [z, #16*19] + ldp x2, x3, [t, #16*19] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*19] + + ldp x0, x1, [z, #16*20] + ldp x2, x3, [t, #16*20] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*20] + + ldp x0, x1, [z, #16*21] + ldp x2, x3, [t, #16*21] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*21] + + ldp x0, x1, [z, #16*22] + ldp x2, x3, [t, #16*22] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*22] + + ldp x0, x1, [z, #16*23] + ldp x2, x3, [t, #16*23] + sbcs x0, x0, x2 + sbcs x1, x1, x3 + stp x0, x1, [z, #16*23] // Get the next digits effectively resulting so far starting at 3k // [...,c,c,c,c,x] @@ -242,24 +451,45 @@ S2N_BN_SYMBOL(bignum_ksqr_32_64): // Now propagate through the top quarter of the result - ldp x0, x1, [z, #16*3*L] + ldp x0, x1, [z, #16*24] adds x0, x0, x adcs x1, x1, c - stp x0, x1, [z, #16*3*L] + stp x0, x1, [z, #16*24] + + ldp x0, x1, [z, #16*25] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*25] + + ldp x0, x1, [z, #16*26] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*26] + + ldp x0, x1, [z, #16*27] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*27] + + ldp x0, x1, [z, #16*28] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*28] + + ldp x0, x1, [z, #16*29] + adcs x0, x0, c + adcs x1, x1, c + stp x0, x1, [z, #16*29] - .set I, 3*L+1 - .rep (L-2) - ldp x0, x1, [z, #16*I] + ldp x0, x1, [z, #16*30] adcs x0, x0, c adcs x1, x1, c - stp x0, x1, [z, #16*I] - .set I, (I+1) - .endr + stp x0, x1, [z, #16*30] - ldp x0, x1, [z, #16*I] + ldp x0, x1, [z, #16*31] adcs x0, x0, c adc x1, x1, c - stp x0, x1, [z, #16*I] + stp x0, x1, [z, #16*31] // Restore @@ -271,7 +501,7 @@ S2N_BN_SYMBOL(bignum_ksqr_32_64): // Local copy of bignum_ksqr_16_32, identical to main one. // This includes in turn a copy of bignum_sqr_8_16. -local_ksqr_16_32: +bignum_ksqr_32_64_local_ksqr_16_32: stp x19, x20, [sp, #-16]! stp x21, x22, [sp, #-16]! stp x23, x24, [sp, #-16]! @@ -279,7 +509,7 @@ local_ksqr_16_32: mov x23, x0 mov x24, x1 mov x25, x2 - bl local_sqr_8_16 + bl bignum_ksqr_32_64_local_sqr_8_16 ldp x10, x11, [x24] ldp x8, x9, [x24, #64] subs x10, x10, x8 @@ -320,7 +550,7 @@ local_ksqr_16_32: stp x16, x17, [x25, #48] add x0, x23, #0x80 add x1, x24, #0x40 - bl local_sqr_8_16 + bl bignum_ksqr_32_64_local_sqr_8_16 ldp x10, x11, [x23, #128] ldp x12, x13, [x23, #64] adds x10, x10, x12 @@ -359,7 +589,7 @@ local_ksqr_16_32: stp x10, x11, [x23, #240] add x0, x25, #0x40 mov x1, x25 - bl local_sqr_8_16 + bl bignum_ksqr_32_64_local_sqr_8_16 ldp x0, x1, [x23] ldp x16, x17, [x23, #128] adds x0, x0, x16 @@ -449,7 +679,7 @@ local_ksqr_16_32: ldp x19, x20, [sp], #16 ret -local_sqr_8_16: +bignum_ksqr_32_64_local_sqr_8_16: ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] diff --git a/arm/generic/bignum_ge.S b/arm/generic/bignum_ge.S index 6ba9202f00..a646b47d43 100644 --- a/arm/generic/bignum_ge.S +++ b/arm/generic/bignum_ge.S @@ -35,51 +35,51 @@ S2N_BN_SYMBOL(bignum_ge): // Speculatively form m := m - n and do case split subs m, m, n - bcc ylonger + bcc bignum_ge_ylonger // The case where x is longer or of the same size (m >= n) // Note that CF=1 initially by the fact that we reach this point - cbz n, xtest -xmainloop: + cbz n, bignum_ge_xtest +bignum_ge_xmainloop: ldr a, [x, i, lsl #3] ldr d, [y, i, lsl #3] sbcs xzr, a, d add i, i, #1 sub n, n, #1 - cbnz n, xmainloop -xtest: - cbz m, xskip -xtoploop: + cbnz n, bignum_ge_xmainloop +bignum_ge_xtest: + cbz m, bignum_ge_xskip +bignum_ge_xtoploop: ldr a, [x, i, lsl #3] sbcs xzr, a, xzr add i, i, #1 sub m, m, #1 - cbnz m, xtoploop -xskip: + cbnz m, bignum_ge_xtoploop +bignum_ge_xskip: cset x0, cs ret // The case where y is longer (n > m) // The first "adds" also makes sure CF=1 initially in this branch -ylonger: +bignum_ge_ylonger: adds m, m, n - cbz m, ytoploop + cbz m, bignum_ge_ytoploop sub n, n, m -ymainloop: +bignum_ge_ymainloop: ldr a, [x, i, lsl #3] ldr d, [y, i, lsl #3] sbcs xzr, a, d add i, i, #1 sub m, m, #1 - cbnz m, ymainloop -ytoploop: + cbnz m, bignum_ge_ymainloop +bignum_ge_ytoploop: ldr a, [y, i, lsl #3] sbcs xzr, xzr, a add i, i, #1 sub n, n, #1 - cbnz n, ytoploop + cbnz n, bignum_ge_ytoploop cset x0, cs ret diff --git a/arm/generic/bignum_mul.S b/arm/generic/bignum_mul.S index 85bfad9d32..1da4bf9516 100644 --- a/arm/generic/bignum_mul.S +++ b/arm/generic/bignum_mul.S @@ -43,7 +43,7 @@ S2N_BN_SYMBOL(bignum_mul): // If p = 0 the result is trivial and nothing needs doing - cbz p, end + cbz p, bignum_mul_end // initialize (h,l) = 0, saving c = 0 for inside the loop @@ -53,7 +53,7 @@ S2N_BN_SYMBOL(bignum_mul): // Iterate outer loop from k = 0 ... k = p - 1 producing result digits mov k, xzr -outerloop: +bignum_mul_outerloop: // Zero the carry for this stage @@ -71,7 +71,7 @@ outerloop: // Set loop count i = b - a, and skip everything if it's <= 0 subs i, b, a - bls innerend + bls bignum_mul_innerend // Use temporary pointers xx = x + 8 * a and yy = y + 8 * (k - b) // Increment xx per iteration but just use loop counter with yy @@ -86,7 +86,7 @@ outerloop: // And index using the loop counter i = b - a, ..., i = 1 -innerloop: +bignum_mul_innerloop: ldr a, [xx], #8 ldr b, [yy, i, lsl #3] mul d, a, b @@ -95,18 +95,18 @@ innerloop: adcs h, h, a adc c, c, xzr subs i, i, #1 - bne innerloop + bne bignum_mul_innerloop -innerend: +bignum_mul_innerend: str l, [z, k, lsl #3] mov l, h mov h, c add k, k, #1 cmp k, p - bcc outerloop // Inverted carry flag! + bcc bignum_mul_outerloop // Inverted carry flag! -end: +bignum_mul_end: ret #if defined(__linux__) && defined(__ELF__) diff --git a/arm/generic/bignum_optsub.S b/arm/generic/bignum_optsub.S index 5d82a407d4..285536ef74 100644 --- a/arm/generic/bignum_optsub.S +++ b/arm/generic/bignum_optsub.S @@ -35,7 +35,7 @@ S2N_BN_SYMBOL(bignum_optsub): // if k = 0 do nothing. This is also the right top carry in X0 - cbz k, end + cbz k, bignum_optsub_end // Convert p into a strict bitmask (same register in fact) @@ -48,7 +48,7 @@ S2N_BN_SYMBOL(bignum_optsub): // Main loop -loop: +bignum_optsub_loop: ldr a, [x, i] ldr b, [y, i] and b, b, m @@ -56,13 +56,13 @@ loop: str a, [z, i] add i, i, #8 sub k, k, #1 - cbnz k, loop + cbnz k, bignum_optsub_loop // Return (non-inverted) carry flag cset x0, cc -end: +bignum_optsub_end: ret #if defined(__linux__) && defined(__ELF__) diff --git a/arm/generic/bignum_sqr.S b/arm/generic/bignum_sqr.S index 39683061fb..1a75dbddbb 100644 --- a/arm/generic/bignum_sqr.S +++ b/arm/generic/bignum_sqr.S @@ -42,7 +42,7 @@ S2N_BN_SYMBOL(bignum_sqr): // If p = 0 the result is trivial and nothing needs doing - cbz p, end + cbz p, bignum_sqr_end // initialize (hh,ll) = 0 @@ -52,7 +52,7 @@ S2N_BN_SYMBOL(bignum_sqr): // Iterate outer loop from k = 0 ... k = p - 1 producing result digits mov k, xzr -outerloop: +bignum_sqr_outerloop: // First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n // We want to accumulate all x[i] * x[k - i] for bot <= i < top @@ -76,7 +76,7 @@ outerloop: // If htop <= bot then main doubled part of the sum is empty cmp htop, i - bls nosumming + bls bignum_sqr_nosumming // Use a moving pointer for [y] = x[k-i] for the cofactor @@ -86,7 +86,7 @@ outerloop: // Do the main part of the sum x[i] * x[k - i] for 2 * i < k -innerloop: +bignum_sqr_innerloop: ldr a, [x, i, lsl #3] ldr b, [y], #-8 mul d, a, b @@ -96,7 +96,7 @@ innerloop: adc c, c, xzr add i, i, #1 cmp i, htop - bne innerloop + bne bignum_sqr_innerloop // Now double it @@ -106,12 +106,12 @@ innerloop: // If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term -nosumming: +bignum_sqr_nosumming: ands xzr, k, #1 - bne innerend + bne bignum_sqr_innerend cmp i, n - bcs innerend + bcs bignum_sqr_innerend ldr a, [x, i, lsl #3] mul d, a, a @@ -122,7 +122,7 @@ nosumming: // Now add the local sum into the global sum, store and shift -innerend: +bignum_sqr_innerend: adds l, l, ll str l, [z, k, lsl #3] adcs ll, h, hh @@ -130,9 +130,9 @@ innerend: add k, k, #1 cmp k, p - bcc outerloop + bcc bignum_sqr_outerloop -end: +bignum_sqr_end: ret #if defined(__linux__) && defined(__ELF__) From 6c1499d0954c007eb3c8e92ef235ca403fa41bc4 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 4 Oct 2023 17:22:14 -0700 Subject: [PATCH 37/42] Add Ed25519 point compression encoding This implements the point compression encoding to a byte array from https://datatracker.ietf.org/doc/html/rfc8032#section-5.1.2 as function "edwards25519_encode". It assumes the input is a point (x,y) on the edwards25519 curve, with coordinates reduced mod p_25519 = 2^255 - 19, and does not check any of that. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/67430bea9f3cc95ce40074ec632d732676d9d63a --- arm/curve25519/edwards25519_encode.S | 131 +++++++++++++++++++++++ x86_att/curve25519/edwards25519_encode.S | 81 ++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 arm/curve25519/edwards25519_encode.S create mode 100644 x86_att/curve25519/edwards25519_encode.S diff --git a/arm/curve25519/edwards25519_encode.S b/arm/curve25519/edwards25519_encode.S new file mode 100644 index 0000000000..4cf301a227 --- /dev/null +++ b/arm/curve25519/edwards25519_encode.S @@ -0,0 +1,131 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Encode edwards25519 point into compressed form as 256-bit number +// Input p[8]; output z[32] (bytes) +// +// extern void edwards25519_encode +// (uint8_t z[static 32], uint64_t p[static 8]); +// +// This assumes that the input buffer p points to a pair of 256-bit +// numbers x (at p) and y (at p+4) representing a point (x,y) on the +// edwards25519 curve. It is assumed that both x and y are < p_25519 +// but there is no checking of this, nor of the fact that (x,y) is +// in fact on the curve. +// +// The output in z is a little-endian array of bytes corresponding to +// the standard compressed encoding of a point as 2^255 * x_0 + y +// where x_0 is the least significant bit of x. +// See "https://datatracker.ietf.org/doc/html/rfc8032#section-5.1.2" +// In this implementation, y is simply truncated to 255 bits, but if +// it is reduced mod p_25519 as expected this does not affect values. +// +// Standard ARM ABI: X0 = z, X1 = p +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_encode) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_encode) + .text + .balign 4 + +#define z x0 +#define p x1 + +#define y0 x2 +#define y1 x3 +#define y2 x4 +#define y3 x5 +#define y0short w2 +#define y1short w3 +#define y2short w4 +#define y3short w5 +#define xb x6 + +S2N_BN_SYMBOL(edwards25519_encode): + +// Load lowest word of x coordinate in xb and full y as [y3;y2;y1;y0]. + + ldr xb, [p] + ldp y0, y1, [p, #32] + ldp y2, y3, [p, #48] + +// Compute the encoded form, making the LSB of x the MSB of the encoding + + and y3, y3, #0x7FFFFFFFFFFFFFFF + orr y3, y3, xb, lsl #63 + +// Write back in a byte-oriented fashion to be independent of endianness + + strb y0short, [z] + lsr y0, y0, #8 + strb y0short, [z, #1] + lsr y0, y0, #8 + strb y0short, [z, #2] + lsr y0, y0, #8 + strb y0short, [z, #3] + lsr y0, y0, #8 + strb y0short, [z, #4] + lsr y0, y0, #8 + strb y0short, [z, #5] + lsr y0, y0, #8 + strb y0short, [z, #6] + lsr y0, y0, #8 + strb y0short, [z, #7] + + strb y1short, [z, #8] + lsr y1, y1, #8 + strb y1short, [z, #9] + lsr y1, y1, #8 + strb y1short, [z, #10] + lsr y1, y1, #8 + strb y1short, [z, #11] + lsr y1, y1, #8 + strb y1short, [z, #12] + lsr y1, y1, #8 + strb y1short, [z, #13] + lsr y1, y1, #8 + strb y1short, [z, #14] + lsr y1, y1, #8 + strb y1short, [z, #15] + + strb y2short, [z, #16] + lsr y2, y2, #8 + strb y2short, [z, #17] + lsr y2, y2, #8 + strb y2short, [z, #18] + lsr y2, y2, #8 + strb y2short, [z, #19] + lsr y2, y2, #8 + strb y2short, [z, #20] + lsr y2, y2, #8 + strb y2short, [z, #21] + lsr y2, y2, #8 + strb y2short, [z, #22] + lsr y2, y2, #8 + strb y2short, [z, #23] + + strb y3short, [z, #24] + lsr y3, y3, #8 + strb y3short, [z, #25] + lsr y3, y3, #8 + strb y3short, [z, #26] + lsr y3, y3, #8 + strb y3short, [z, #27] + lsr y3, y3, #8 + strb y3short, [z, #28] + lsr y3, y3, #8 + strb y3short, [z, #29] + lsr y3, y3, #8 + strb y3short, [z, #30] + lsr y3, y3, #8 + strb y3short, [z, #31] + +// Return + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/x86_att/curve25519/edwards25519_encode.S b/x86_att/curve25519/edwards25519_encode.S new file mode 100644 index 0000000000..bdbaa47232 --- /dev/null +++ b/x86_att/curve25519/edwards25519_encode.S @@ -0,0 +1,81 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Encode edwards25519 point into compressed form as 256-bit number +// Input p[8]; output z[32] (bytes) +// +// extern void edwards25519_encode +// (uint8_t z[static 32], uint64_t p[static 8]); +// +// This assumes that the input buffer p points to a pair of 256-bit +// numbers x (at p) and y (at p+4) representing a point (x,y) on the +// edwards25519 curve. It is assumed that both x and y are < p_25519 +// but there is no checking of this, nor of the fact that (x,y) is +// in fact on the curve. +// +// The output in z is a little-endian array of bytes corresponding to +// the standard compressed encoding of a point as 2^255 * x_0 + y +// where x_0 is the least significant bit of x. +// See "https://datatracker.ietf.org/doc/html/rfc8032#section-5.1.2" +// In this implementation, y is simply truncated to 255 bits, but if +// it is reduced mod p_25519 as expected this does not affect values. +// +// Standard x86-64 ABI: RDI = z, RSI = p +// Microsoft x64 ABI: RCX = z, RDX = p +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_encode) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_encode) + .text + +#define z %rdi +#define p %rsi +#define y0 %rax +#define y1 %rcx +#define y2 %rdx +#define y3 %r8 +#define xb %r9 + +S2N_BN_SYMBOL(edwards25519_encode): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +// Load lowest word of x coordinate in xb and full y as [y3;y2;y1;y0]. + + movq (p), xb + movq 32(p), y0 + movq 40(p), y1 + movq 48(p), y2 + movq 56(p), y3 + +// Compute the encoded form, making the LSB of x the MSB of the encoding + + btr $63, y3 + shlq $63, xb + orq xb, y3 + +// Store back (by the word, since x86 is little-endian anyway) + + movq y0, (z) + movq y1, 8(z) + movq y2, 16(z) + movq y3, 24(z) + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif From 55ea1743810188ae5b82cf92dddd30604a106aa1 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 13 Oct 2023 20:45:51 -0700 Subject: [PATCH 38/42] Add Ed25519 point decoding function This implements point decoding from a 256-bit little-endian byte sequence to a point (x,y) on the edwards25519 curve as specified in https://datatracker.ietf.org/doc/html/rfc8032#section-5.1.3 The function returns 0 for success and 1 for failure, the latter meaning that the input is not the encoding of any edwards25519 point. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/97f74932f775695ecd4fe897343067a2592f7c4b --- arm/curve25519/edwards25519_decode.S | 700 +++++++++++++++++ arm/curve25519/edwards25519_decode_alt.S | 563 ++++++++++++++ x86_att/curve25519/edwards25519_decode.S | 670 +++++++++++++++++ x86_att/curve25519/edwards25519_decode_alt.S | 751 +++++++++++++++++++ 4 files changed, 2684 insertions(+) create mode 100644 arm/curve25519/edwards25519_decode.S create mode 100644 arm/curve25519/edwards25519_decode_alt.S create mode 100644 x86_att/curve25519/edwards25519_decode.S create mode 100644 x86_att/curve25519/edwards25519_decode_alt.S diff --git a/arm/curve25519/edwards25519_decode.S b/arm/curve25519/edwards25519_decode.S new file mode 100644 index 0000000000..9161768db7 --- /dev/null +++ b/arm/curve25519/edwards25519_decode.S @@ -0,0 +1,700 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Decode compressed 256-bit form of edwards25519 point +// Input c[32] (bytes); output function return and z[8] +// +// extern uint64_t edwards25519_decode(uint64_t z[static 8],uint8_t c[static 32]); +// +// This interprets the input byte string as a little-endian number +// representing a point (x,y) on the edwards25519 curve, encoded as +// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +// returns the full pair of coordinates x (at z) and y (at z+4). The +// return code is 0 for success and 1 for failure, which means that +// the input does not correspond to the encoding of any edwards25519 +// point. This can happen for three reasons, where y = the lowest +// 255 bits of the input: +// +// * y >= p_25519 +// Input y coordinate is not reduced +// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +// There is no x such that (x,y) is on the curve +// * y^2 = 1 and top bit of input is set +// Cannot be the canonical encoding of (0,1) or (0,-1) +// +// Standard ARM ABI: X0 = z, X1 = c +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_decode) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_decode) + + .text + .balign 4 + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack + +#define y sp, #0 +#define s sp, #(4*N) +#define t sp, #(8*N) +#define u sp, #(12*N) +#define v sp, #(16*N) +#define w sp, #(20*N) + +// Other temporary variables in register + +#define res x19 +#define sgnbit x20 +#define badun x21 + +// Total size to reserve on the stack + +#define NSPACE #(24*N) + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// Macros wrapping up calls to the local subroutines + +#define mulp(dest,src1,src2) \ + add x0, dest; \ + add x1, src1; \ + add x2, src2; \ + bl edwards25519_decode_mul_p25519 + +#define nsqr(dest,n,src) \ + add x0, dest; \ + mov x1, n; \ + add x2, src; \ + bl edwards25519_decode_nsqr_p25519 + +S2N_BN_SYMBOL(edwards25519_decode): + +// Save registers and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x30, [sp, -16]! + sub sp, sp, NSPACE + +// Save the return pointer for the end so we can overwrite x0 later + + mov res, x0 + +// Load the inputs, using byte operations in case of big-endian setting. +// Let y be the lowest 255 bits of the input and sgnbit the desired parity. +// If y >= p_25519 then already flag the input as invalid (badun = 1). + + ldrb w0, [x1] + lsl x4, x0, #56 + ldrb w0, [x1, #1] + extr x4, x0, x4, #8 + ldrb w0, [x1, #2] + extr x4, x0, x4, #8 + ldrb w0, [x1, #3] + extr x4, x0, x4, #8 + ldrb w0, [x1, #4] + extr x4, x0, x4, #8 + ldrb w0, [x1, #5] + extr x4, x0, x4, #8 + ldrb w0, [x1, #6] + extr x4, x0, x4, #8 + ldrb w0, [x1, #7] + extr x4, x0, x4, #8 + + ldrb w0, [x1, #8] + lsl x5, x0, #56 + ldrb w0, [x1, #9] + extr x5, x0, x5, #8 + ldrb w0, [x1, #10] + extr x5, x0, x5, #8 + ldrb w0, [x1, #11] + extr x5, x0, x5, #8 + ldrb w0, [x1, #12] + extr x5, x0, x5, #8 + ldrb w0, [x1, #13] + extr x5, x0, x5, #8 + ldrb w0, [x1, #14] + extr x5, x0, x5, #8 + ldrb w0, [x1, #15] + extr x5, x0, x5, #8 + + ldrb w0, [x1, #16] + lsl x6, x0, #56 + ldrb w0, [x1, #17] + extr x6, x0, x6, #8 + ldrb w0, [x1, #18] + extr x6, x0, x6, #8 + ldrb w0, [x1, #19] + extr x6, x0, x6, #8 + ldrb w0, [x1, #20] + extr x6, x0, x6, #8 + ldrb w0, [x1, #21] + extr x6, x0, x6, #8 + ldrb w0, [x1, #22] + extr x6, x0, x6, #8 + ldrb w0, [x1, #23] + extr x6, x0, x6, #8 + + ldrb w0, [x1, #24] + lsl x7, x0, #56 + ldrb w0, [x1, #25] + extr x7, x0, x7, #8 + ldrb w0, [x1, #26] + extr x7, x0, x7, #8 + ldrb w0, [x1, #27] + extr x7, x0, x7, #8 + ldrb w0, [x1, #28] + extr x7, x0, x7, #8 + ldrb w0, [x1, #29] + extr x7, x0, x7, #8 + ldrb w0, [x1, #30] + extr x7, x0, x7, #8 + ldrb w0, [x1, #31] + extr x7, x0, x7, #8 + + stp x4, x5, [y] + lsr sgnbit, x7, #63 + and x7, x7, #0x7FFFFFFFFFFFFFFF + stp x6, x7, [y+16] + + adds xzr, x4, #19 + adcs xzr, x5, xzr + adcs xzr, x6, xzr + adcs xzr, x7, xzr + cset badun, mi + +// u = y^2 - 1 (actually y + 2^255-20, not reduced modulo) +// v = 1 + d * y^2 (not reduced modulo from the +1) +// w = u * v + + nsqr(v,1,y) + ldp x0, x1, [v] + ldp x2, x3, [v+16] + mov x4, #0x8000000000000000 + subs x0, x0, #20 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x3, x3, x4 + stp x0, x1, [u] + stp x2, x3, [u+16] + + movbig(x0,#0x75eb,#0x4dca,#0x1359,#0x78a3) + movbig(x1,#0x0070,#0x0a4d,#0x4141,#0xd8ab) + movbig(x2,#0x8cc7,#0x4079,#0x7779,#0xe898) + movbig(x3,#0x5203,#0x6cee,#0x2b6f,#0xfe73) + stp x0, x1, [w] + stp x2, x3, [w+16] + mulp(v,w,v) + ldp x0, x1, [v] + ldp x2, x3, [v+16] + adds x0, x0, #1 + adcs x1, x1, xzr + adcs x2, x2, xzr + adcs x3, x3, xzr + stp x0, x1, [v] + stp x2, x3, [v+16] + + mulp(w,u,v) + +// Get s = w^{252-3} as a candidate inverse square root 1/sqrt(w). +// This power tower computation is the same as bignum_invsqrt_p25519 + + nsqr(t,1,w) + mulp(t,t,w) + nsqr(s,2,t) + mulp(t,s,t) + nsqr(s,1,t) + mulp(v,s,w) + nsqr(s,5,v) + mulp(t,s,v) + nsqr(s,10,t) + mulp(t,s,t) + nsqr(s,5,t) + mulp(v,s,v) + nsqr(s,25,v) + mulp(t,s,v) + nsqr(s,50,t) + mulp(t,s,t) + nsqr(s,25,t) + mulp(v,s,v) + nsqr(s,125,v) + mulp(v,s,v) + nsqr(s,2,v) + mulp(s,s,w) + +// Compute v' = s^2 * w to discriminate whether the square root sqrt(u/v) +// exists, in which case we should get 0, 1 or -1. + + nsqr(v,1,s) + mulp(v,v,w) + +// Get the two candidates for sqrt(u / v), one being s = u * w^{252-3} +// and the other being t = s * j_25519 where j_25519 = sqrt(-1). + + mulp(s,u,s) + movbig(x0, #0xc4ee, #0x1b27, #0x4a0e, #0xa0b0) + movbig(x1, #0x2f43, #0x1806, #0xad2f, #0xe478) + movbig(x2, #0x2b4d, #0x0099, #0x3dfb, #0xd7a7) + movbig(x3, #0x2b83, #0x2480, #0x4fc1, #0xdf0b) + stp x0, x1, [t] + stp x2, x3, [t+16] + mulp(t,s,t) + +// x4 = 0 <=> s^2 * w = 0 or 1 + + ldp x0, x1, [v] + ldp x2, x3, [v+16] + bic x4, x0, #1 + orr x4, x4, x1 + orr x5, x2, x3 + orr x4, x4, x5 + +// x0 = 0 <=> s^2 * w = -1 (mod p_25519, i.e. s^2 * w = 2^255 - 20) + + add x0, x0, #20 + add x1, x1, #1 + orr x0, x0, x1 + add x2, x2, #1 + eor x3, x3, #0x7FFFFFFFFFFFFFFF + orr x2, x2, x3 + orr x0, x0, x2 + +// If s^2 * w is not 0 or 1 then replace s by t + + cmp x4, xzr + ldp x10, x11, [s] + ldp x14, x15, [t] + csel x10, x10, x14, eq + csel x11, x11, x15, eq + ldp x12, x13, [s+16] + ldp x16, x17, [t+16] + csel x12, x12, x16, eq + csel x13, x13, x17, eq + stp x10, x11, [s] + stp x12, x13, [s+16] + +// Check invalidity, occurring if s^2 * w is not in {0,1,-1} + + ccmp x0, xzr, 4, ne + cset x0, ne + orr badun, badun, x0 + +// Let [x3;x2;x1;x0] = s and [x7;x6;x5;x4] = p_25519 - s + + ldp x0, x1, [s] + ldp x2, x3, [s+16] + mov x4, #-19 + subs x4, x4, x0 + mov x6, #-1 + sbcs x5, x6, x1 + sbcs x6, x6, x2 + mov x7, #0x7FFFFFFFFFFFFFFF + sbc x7, x7, x3 + +// Decide whether a flip is apparently indicated, s_0 <=> sgnbit +// Decide also if s = 0 by OR-ing its digits. Now if a flip is indicated: +// - if s = 0 then mark as invalid +// - if s <> 0 then indeed flip + + and x9, x0, #1 + eor sgnbit, x9, sgnbit + orr x8, x0, x1 + orr x9, x2, x3 + orr x8, x8, x9 + orr x10, badun, sgnbit + cmp x8, xzr + csel badun, x10, badun, eq + ccmp sgnbit, xzr, #4, ne + +// Actual selection of x as s or -s, copying of y and return of validity + + csel x0, x0, x4, eq + csel x1, x1, x5, eq + csel x2, x2, x6, eq + csel x3, x3, x7, eq + ldp x8, x9, [y] + ldp x10, x11, [y+16] + + stp x0, x1, [res] + stp x2, x3, [res, #16] + stp x8, x9, [res, #32] + stp x10, x11, [res, #48] + + mov x0, badun + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x21, x30, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +// ************************************************************* +// Local z = x * y +// ************************************************************* + +edwards25519_decode_mul_p25519: + ldp x3, x4, [x1] + ldp x5, x6, [x2] + umull x7, w3, w5 + lsr x17, x3, #32 + umull x15, w17, w5 + lsr x16, x5, #32 + umull x8, w16, w17 + umull x16, w3, w16 + adds x7, x7, x15, lsl #32 + lsr x15, x15, #32 + adc x8, x8, x15 + adds x7, x7, x16, lsl #32 + lsr x16, x16, #32 + adc x8, x8, x16 + mul x9, x4, x6 + umulh x10, x4, x6 + subs x4, x4, x3 + cneg x4, x4, lo + csetm x16, lo + adds x9, x9, x8 + adc x10, x10, xzr + subs x3, x5, x6 + cneg x3, x3, lo + cinv x16, x16, lo + mul x15, x4, x3 + umulh x3, x4, x3 + adds x8, x7, x9 + adcs x9, x9, x10 + adc x10, x10, xzr + cmn x16, #1 + eor x15, x15, x16 + adcs x8, x15, x8 + eor x3, x3, x16 + adcs x9, x3, x9 + adc x10, x10, x16 + ldp x3, x4, [x1, #16] + ldp x5, x6, [x2, #16] + umull x11, w3, w5 + lsr x17, x3, #32 + umull x15, w17, w5 + lsr x16, x5, #32 + umull x12, w16, w17 + umull x16, w3, w16 + adds x11, x11, x15, lsl #32 + lsr x15, x15, #32 + adc x12, x12, x15 + adds x11, x11, x16, lsl #32 + lsr x16, x16, #32 + adc x12, x12, x16 + mul x13, x4, x6 + umulh x14, x4, x6 + subs x4, x4, x3 + cneg x4, x4, lo + csetm x16, lo + adds x13, x13, x12 + adc x14, x14, xzr + subs x3, x5, x6 + cneg x3, x3, lo + cinv x16, x16, lo + mul x15, x4, x3 + umulh x3, x4, x3 + adds x12, x11, x13 + adcs x13, x13, x14 + adc x14, x14, xzr + cmn x16, #1 + eor x15, x15, x16 + adcs x12, x15, x12 + eor x3, x3, x16 + adcs x13, x3, x13 + adc x14, x14, x16 + ldp x3, x4, [x1, #16] + ldp x15, x16, [x1] + subs x3, x3, x15 + sbcs x4, x4, x16 + csetm x16, lo + ldp x15, x17, [x2] + subs x5, x15, x5 + sbcs x6, x17, x6 + csetm x17, lo + eor x3, x3, x16 + subs x3, x3, x16 + eor x4, x4, x16 + sbc x4, x4, x16 + eor x5, x5, x17 + subs x5, x5, x17 + eor x6, x6, x17 + sbc x6, x6, x17 + eor x16, x17, x16 + adds x11, x11, x9 + adcs x12, x12, x10 + adcs x13, x13, xzr + adc x14, x14, xzr + mul x2, x3, x5 + umulh x17, x3, x5 + mul x15, x4, x6 + umulh x1, x4, x6 + subs x4, x4, x3 + cneg x4, x4, lo + csetm x9, lo + adds x15, x15, x17 + adc x1, x1, xzr + subs x6, x5, x6 + cneg x6, x6, lo + cinv x9, x9, lo + mul x5, x4, x6 + umulh x6, x4, x6 + adds x17, x2, x15 + adcs x15, x15, x1 + adc x1, x1, xzr + cmn x9, #1 + eor x5, x5, x9 + adcs x17, x5, x17 + eor x6, x6, x9 + adcs x15, x6, x15 + adc x1, x1, x9 + adds x9, x11, x7 + adcs x10, x12, x8 + adcs x11, x13, x11 + adcs x12, x14, x12 + adcs x13, x13, xzr + adc x14, x14, xzr + cmn x16, #1 + eor x2, x2, x16 + adcs x9, x2, x9 + eor x17, x17, x16 + adcs x10, x17, x10 + eor x15, x15, x16 + adcs x11, x15, x11 + eor x1, x1, x16 + adcs x12, x1, x12 + adcs x13, x13, x16 + adc x14, x14, x16 + mov x3, #38 + umull x4, w11, w3 + add x4, x4, w7, uxtw + lsr x7, x7, #32 + lsr x11, x11, #32 + umaddl x11, w11, w3, x7 + mov x7, x4 + umull x4, w12, w3 + add x4, x4, w8, uxtw + lsr x8, x8, #32 + lsr x12, x12, #32 + umaddl x12, w12, w3, x8 + mov x8, x4 + umull x4, w13, w3 + add x4, x4, w9, uxtw + lsr x9, x9, #32 + lsr x13, x13, #32 + umaddl x13, w13, w3, x9 + mov x9, x4 + umull x4, w14, w3 + add x4, x4, w10, uxtw + lsr x10, x10, #32 + lsr x14, x14, #32 + umaddl x14, w14, w3, x10 + mov x10, x4 + lsr x17, x14, #31 + mov x5, #19 + umaddl x5, w5, w17, x5 + add x7, x7, x5 + adds x7, x7, x11, lsl #32 + extr x3, x12, x11, #32 + adcs x8, x8, x3 + extr x3, x13, x12, #32 + adcs x9, x9, x3 + extr x3, x14, x13, #32 + lsl x5, x17, #63 + eor x10, x10, x5 + adc x10, x10, x3 + mov x3, #19 + tst x10, #0x8000000000000000 + csel x3, x3, xzr, pl + subs x7, x7, x3 + sbcs x8, x8, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + and x10, x10, #0x7fffffffffffffff + stp x7, x8, [x0] + stp x9, x10, [x0, #16] + ret + +// ************************************************************* +// Local z = 2^n * x +// ************************************************************* + +edwards25519_decode_nsqr_p25519: + +// Copy input argument into [x13;x12;x11;x10] + + ldp x10, x11, [x2] + ldp x12, x13, [x2, #16] + +// Main squaring loop, accumulating in [x13;x12;x11;x10] consistently and +// only ensuring the intermediates are < 2 * p_25519 = 2^256 - 38 + +edwards25519_decode_loop: + umull x2, w10, w10 + lsr x14, x10, #32 + umull x3, w14, w14 + umull x14, w10, w14 + adds x2, x2, x14, lsl #33 + lsr x14, x14, #31 + adc x3, x3, x14 + umull x4, w11, w11 + lsr x14, x11, #32 + umull x5, w14, w14 + umull x14, w11, w14 + mul x15, x10, x11 + umulh x16, x10, x11 + adds x4, x4, x14, lsl #33 + lsr x14, x14, #31 + adc x5, x5, x14 + adds x15, x15, x15 + adcs x16, x16, x16 + adc x5, x5, xzr + adds x3, x3, x15 + adcs x4, x4, x16 + adc x5, x5, xzr + umull x6, w12, w12 + lsr x14, x12, #32 + umull x7, w14, w14 + umull x14, w12, w14 + adds x6, x6, x14, lsl #33 + lsr x14, x14, #31 + adc x7, x7, x14 + umull x8, w13, w13 + lsr x14, x13, #32 + umull x9, w14, w14 + umull x14, w13, w14 + mul x15, x12, x13 + umulh x16, x12, x13 + adds x8, x8, x14, lsl #33 + lsr x14, x14, #31 + adc x9, x9, x14 + adds x15, x15, x15 + adcs x16, x16, x16 + adc x9, x9, xzr + adds x7, x7, x15 + adcs x8, x8, x16 + adc x9, x9, xzr + subs x10, x10, x12 + sbcs x11, x11, x13 + csetm x16, lo + eor x10, x10, x16 + subs x10, x10, x16 + eor x11, x11, x16 + sbc x11, x11, x16 + adds x6, x6, x4 + adcs x7, x7, x5 + adcs x8, x8, xzr + adc x9, x9, xzr + umull x12, w10, w10 + lsr x5, x10, #32 + umull x13, w5, w5 + umull x5, w10, w5 + adds x12, x12, x5, lsl #33 + lsr x5, x5, #31 + adc x13, x13, x5 + umull x15, w11, w11 + lsr x5, x11, #32 + umull x14, w5, w5 + umull x5, w11, w5 + mul x4, x10, x11 + umulh x16, x10, x11 + adds x15, x15, x5, lsl #33 + lsr x5, x5, #31 + adc x14, x14, x5 + adds x4, x4, x4 + adcs x16, x16, x16 + adc x14, x14, xzr + adds x13, x13, x4 + adcs x15, x15, x16 + adc x14, x14, xzr + adds x4, x2, x6 + adcs x5, x3, x7 + adcs x6, x6, x8 + adcs x7, x7, x9 + csetm x16, lo + subs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x15 + sbcs x7, x7, x14 + adcs x8, x8, x16 + adc x9, x9, x16 + mov x10, #38 + umull x12, w6, w10 + add x12, x12, w2, uxtw + lsr x2, x2, #32 + lsr x6, x6, #32 + umaddl x6, w6, w10, x2 + mov x2, x12 + umull x12, w7, w10 + add x12, x12, w3, uxtw + lsr x3, x3, #32 + lsr x7, x7, #32 + umaddl x7, w7, w10, x3 + mov x3, x12 + umull x12, w8, w10 + add x12, x12, w4, uxtw + lsr x4, x4, #32 + lsr x8, x8, #32 + umaddl x8, w8, w10, x4 + mov x4, x12 + umull x12, w9, w10 + add x12, x12, w5, uxtw + lsr x5, x5, #32 + lsr x9, x9, #32 + umaddl x9, w9, w10, x5 + mov x5, x12 + lsr x13, x9, #31 + mov x11, #19 + umull x11, w11, w13 + add x2, x2, x11 + adds x10, x2, x6, lsl #32 + extr x12, x7, x6, #32 + adcs x11, x3, x12 + extr x12, x8, x7, #32 + adcs x12, x4, x12 + extr x14, x9, x8, #32 + lsl x15, x13, #63 + eor x5, x5, x15 + adc x13, x5, x14 + +// Loop as applicable + + subs x1, x1, #1 + bne edwards25519_decode_loop + +// We know the intermediate result x < 2^256 - 38, and now we do strict +// modular reduction mod 2^255 - 19. Note x < 2^255 - 19 <=> x + 19 < 2^255 +// which is equivalent to a "pl" condition. + + adds x6, x10, #19 + adcs x7, x11, xzr + adcs x8, x12, xzr + adcs x9, x13, xzr + + csel x10, x10, x6, pl + csel x11, x11, x7, pl + csel x12, x12, x8, pl + csel x13, x13, x9, pl + bic x13, x13, #0x8000000000000000 + +// Copy result back into destination and return + + stp x10, x11, [x0] + stp x12, x13, [x0, #16] + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/arm/curve25519/edwards25519_decode_alt.S b/arm/curve25519/edwards25519_decode_alt.S new file mode 100644 index 0000000000..c77a191744 --- /dev/null +++ b/arm/curve25519/edwards25519_decode_alt.S @@ -0,0 +1,563 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Decode compressed 256-bit form of edwards25519 point +// Input c[32] (bytes); output function return and z[8] +// +// extern uint64_t edwards25519_decode_alt(uint64_t z[static 8],uint8_t c[static 32]); +// +// This interprets the input byte string as a little-endian number +// representing a point (x,y) on the edwards25519 curve, encoded as +// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +// returns the full pair of coordinates x (at z) and y (at z+4). The +// return code is 0 for success and 1 for failure, which means that +// the input does not correspond to the encoding of any edwards25519 +// point. This can happen for three reasons, where y = the lowest +// 255 bits of the input: +// +// * y >= p_25519 +// Input y coordinate is not reduced +// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +// There is no x such that (x,y) is on the curve +// * y^2 = 1 and top bit of input is set +// Cannot be the canonical encoding of (0,1) or (0,-1) +// +// Standard ARM ABI: X0 = z, X1 = c +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_decode_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_decode_alt) + + .text + .balign 4 + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack + +#define y sp, #0 +#define s sp, #(4*N) +#define t sp, #(8*N) +#define u sp, #(12*N) +#define v sp, #(16*N) +#define w sp, #(20*N) + +// Other temporary variables in register + +#define res x19 +#define sgnbit x20 +#define badun x21 + +// Total size to reserve on the stack + +#define NSPACE #(24*N) + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// Macros wrapping up calls to the local subroutines + +#define mulp(dest,src1,src2) \ + add x0, dest; \ + add x1, src1; \ + add x2, src2; \ + bl edwards25519_decode_alt_mul_p25519 + +#define nsqr(dest,n,src) \ + add x0, dest; \ + mov x1, n; \ + add x2, src; \ + bl edwards25519_decode_alt_nsqr_p25519 + +S2N_BN_SYMBOL(edwards25519_decode_alt): + +// Save registers and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x30, [sp, -16]! + sub sp, sp, NSPACE + +// Save the return pointer for the end so we can overwrite x0 later + + mov res, x0 + +// Load the inputs, using byte operations in case of big-endian setting. +// Let y be the lowest 255 bits of the input and sgnbit the desired parity. +// If y >= p_25519 then already flag the input as invalid (badun = 1). + + ldrb w0, [x1] + lsl x4, x0, #56 + ldrb w0, [x1, #1] + extr x4, x0, x4, #8 + ldrb w0, [x1, #2] + extr x4, x0, x4, #8 + ldrb w0, [x1, #3] + extr x4, x0, x4, #8 + ldrb w0, [x1, #4] + extr x4, x0, x4, #8 + ldrb w0, [x1, #5] + extr x4, x0, x4, #8 + ldrb w0, [x1, #6] + extr x4, x0, x4, #8 + ldrb w0, [x1, #7] + extr x4, x0, x4, #8 + + ldrb w0, [x1, #8] + lsl x5, x0, #56 + ldrb w0, [x1, #9] + extr x5, x0, x5, #8 + ldrb w0, [x1, #10] + extr x5, x0, x5, #8 + ldrb w0, [x1, #11] + extr x5, x0, x5, #8 + ldrb w0, [x1, #12] + extr x5, x0, x5, #8 + ldrb w0, [x1, #13] + extr x5, x0, x5, #8 + ldrb w0, [x1, #14] + extr x5, x0, x5, #8 + ldrb w0, [x1, #15] + extr x5, x0, x5, #8 + + ldrb w0, [x1, #16] + lsl x6, x0, #56 + ldrb w0, [x1, #17] + extr x6, x0, x6, #8 + ldrb w0, [x1, #18] + extr x6, x0, x6, #8 + ldrb w0, [x1, #19] + extr x6, x0, x6, #8 + ldrb w0, [x1, #20] + extr x6, x0, x6, #8 + ldrb w0, [x1, #21] + extr x6, x0, x6, #8 + ldrb w0, [x1, #22] + extr x6, x0, x6, #8 + ldrb w0, [x1, #23] + extr x6, x0, x6, #8 + + ldrb w0, [x1, #24] + lsl x7, x0, #56 + ldrb w0, [x1, #25] + extr x7, x0, x7, #8 + ldrb w0, [x1, #26] + extr x7, x0, x7, #8 + ldrb w0, [x1, #27] + extr x7, x0, x7, #8 + ldrb w0, [x1, #28] + extr x7, x0, x7, #8 + ldrb w0, [x1, #29] + extr x7, x0, x7, #8 + ldrb w0, [x1, #30] + extr x7, x0, x7, #8 + ldrb w0, [x1, #31] + extr x7, x0, x7, #8 + + stp x4, x5, [y] + lsr sgnbit, x7, #63 + and x7, x7, #0x7FFFFFFFFFFFFFFF + stp x6, x7, [y+16] + + adds xzr, x4, #19 + adcs xzr, x5, xzr + adcs xzr, x6, xzr + adcs xzr, x7, xzr + cset badun, mi + +// u = y^2 - 1 (actually y + 2^255-20, not reduced modulo) +// v = 1 + d * y^2 (not reduced modulo from the +1) +// w = u * v + + nsqr(v,1,y) + ldp x0, x1, [v] + ldp x2, x3, [v+16] + mov x4, #0x8000000000000000 + subs x0, x0, #20 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x3, x3, x4 + stp x0, x1, [u] + stp x2, x3, [u+16] + + movbig(x0,#0x75eb,#0x4dca,#0x1359,#0x78a3) + movbig(x1,#0x0070,#0x0a4d,#0x4141,#0xd8ab) + movbig(x2,#0x8cc7,#0x4079,#0x7779,#0xe898) + movbig(x3,#0x5203,#0x6cee,#0x2b6f,#0xfe73) + stp x0, x1, [w] + stp x2, x3, [w+16] + mulp(v,w,v) + ldp x0, x1, [v] + ldp x2, x3, [v+16] + adds x0, x0, #1 + adcs x1, x1, xzr + adcs x2, x2, xzr + adcs x3, x3, xzr + stp x0, x1, [v] + stp x2, x3, [v+16] + + mulp(w,u,v) + +// Get s = w^{252-3} as a candidate inverse square root 1/sqrt(w). +// This power tower computation is the same as bignum_invsqrt_p25519 + + nsqr(t,1,w) + mulp(t,t,w) + nsqr(s,2,t) + mulp(t,s,t) + nsqr(s,1,t) + mulp(v,s,w) + nsqr(s,5,v) + mulp(t,s,v) + nsqr(s,10,t) + mulp(t,s,t) + nsqr(s,5,t) + mulp(v,s,v) + nsqr(s,25,v) + mulp(t,s,v) + nsqr(s,50,t) + mulp(t,s,t) + nsqr(s,25,t) + mulp(v,s,v) + nsqr(s,125,v) + mulp(v,s,v) + nsqr(s,2,v) + mulp(s,s,w) + +// Compute v' = s^2 * w to discriminate whether the square root sqrt(u/v) +// exists, in which case we should get 0, 1 or -1. + + nsqr(v,1,s) + mulp(v,v,w) + +// Get the two candidates for sqrt(u / v), one being s = u * w^{252-3} +// and the other being t = s * j_25519 where j_25519 = sqrt(-1). + + mulp(s,u,s) + movbig(x0, #0xc4ee, #0x1b27, #0x4a0e, #0xa0b0) + movbig(x1, #0x2f43, #0x1806, #0xad2f, #0xe478) + movbig(x2, #0x2b4d, #0x0099, #0x3dfb, #0xd7a7) + movbig(x3, #0x2b83, #0x2480, #0x4fc1, #0xdf0b) + stp x0, x1, [t] + stp x2, x3, [t+16] + mulp(t,s,t) + +// x4 = 0 <=> s^2 * w = 0 or 1 + + ldp x0, x1, [v] + ldp x2, x3, [v+16] + bic x4, x0, #1 + orr x4, x4, x1 + orr x5, x2, x3 + orr x4, x4, x5 + +// x0 = 0 <=> s^2 * w = -1 (mod p_25519, i.e. s^2 * w = 2^255 - 20) + + add x0, x0, #20 + add x1, x1, #1 + orr x0, x0, x1 + add x2, x2, #1 + eor x3, x3, #0x7FFFFFFFFFFFFFFF + orr x2, x2, x3 + orr x0, x0, x2 + +// If s^2 * w is not 0 or 1 then replace s by t + + cmp x4, xzr + ldp x10, x11, [s] + ldp x14, x15, [t] + csel x10, x10, x14, eq + csel x11, x11, x15, eq + ldp x12, x13, [s+16] + ldp x16, x17, [t+16] + csel x12, x12, x16, eq + csel x13, x13, x17, eq + stp x10, x11, [s] + stp x12, x13, [s+16] + +// Check invalidity, occurring if s^2 * w is not in {0,1,-1} + + ccmp x0, xzr, 4, ne + cset x0, ne + orr badun, badun, x0 + +// Let [x3;x2;x1;x0] = s and [x7;x6;x5;x4] = p_25519 - s + + ldp x0, x1, [s] + ldp x2, x3, [s+16] + mov x4, #-19 + subs x4, x4, x0 + mov x6, #-1 + sbcs x5, x6, x1 + sbcs x6, x6, x2 + mov x7, #0x7FFFFFFFFFFFFFFF + sbc x7, x7, x3 + +// Decide whether a flip is apparently indicated, s_0 <=> sgnbit +// Decide also if s = 0 by OR-ing its digits. Now if a flip is indicated: +// - if s = 0 then mark as invalid +// - if s <> 0 then indeed flip + + and x9, x0, #1 + eor sgnbit, x9, sgnbit + orr x8, x0, x1 + orr x9, x2, x3 + orr x8, x8, x9 + orr x10, badun, sgnbit + cmp x8, xzr + csel badun, x10, badun, eq + ccmp sgnbit, xzr, #4, ne + +// Actual selection of x as s or -s, copying of y and return of validity + + csel x0, x0, x4, eq + csel x1, x1, x5, eq + csel x2, x2, x6, eq + csel x3, x3, x7, eq + ldp x8, x9, [y] + ldp x10, x11, [y+16] + + stp x0, x1, [res] + stp x2, x3, [res, #16] + stp x8, x9, [res, #32] + stp x10, x11, [res, #48] + + mov x0, badun + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x21, x30, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +// ************************************************************* +// Local z = x * y +// ************************************************************* + +edwards25519_decode_alt_mul_p25519: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x2, #16] + mul x11, x3, x9 + umulh x15, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x16, x3, x10 + adcs x15, x15, x11 + adc x16, x16, xzr + ldp x5, x6, [x1, #16] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x15, x15, x11 + mul x11, x4, x10 + adcs x16, x16, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x15, x15, x11 + umulh x11, x4, x9 + adcs x16, x16, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x15, x15, x11 + mul x11, x5, x9 + adcs x16, x16, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x15, x15, x11 + umulh x11, x5, x8 + adcs x16, x16, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x15, x15, x11 + mul x11, x6, x8 + adcs x16, x16, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + umulh x11, x6, x7 + adds x16, x16, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + mov x7, #38 + mul x11, x7, x16 + umulh x9, x7, x16 + adds x12, x12, x11 + mul x11, x7, x3 + umulh x3, x7, x3 + adcs x13, x13, x11 + mul x11, x7, x4 + umulh x4, x7, x4 + adcs x14, x14, x11 + mul x11, x7, x5 + umulh x5, x7, x5 + adcs x15, x15, x11 + cset x16, hs + adds x15, x15, x4 + adc x16, x16, x5 + cmn x15, x15 + orr x15, x15, #0x8000000000000000 + adc x8, x16, x16 + mov x7, #19 + madd x11, x7, x8, x7 + adds x12, x12, x11 + adcs x13, x13, x9 + adcs x14, x14, x3 + adcs x15, x15, xzr + csel x7, x7, xzr, lo + subs x12, x12, x7 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + and x15, x15, #0x7fffffffffffffff + stp x12, x13, [x0] + stp x14, x15, [x0, #16] + ret + +// ************************************************************* +// Local z = 2^n * x +// ************************************************************* + +edwards25519_decode_alt_nsqr_p25519: + +// Copy input argument into [x5;x4;x3;x2] (overwriting input pointer x20 + + ldp x6, x3, [x2] + ldp x4, x5, [x2, #16] + mov x2, x6 + +// Main squaring loop, accumulating in [x5;x4;x3;x2] consistently and +// only ensuring the intermediates are < 2 * p_25519 = 2^256 - 38 + +edwards25519_decode_alt_loop: + mul x9, x2, x3 + umulh x10, x2, x3 + mul x11, x2, x5 + umulh x12, x2, x5 + mul x7, x2, x4 + umulh x6, x2, x4 + adds x10, x10, x7 + adcs x11, x11, x6 + mul x7, x3, x4 + umulh x6, x3, x4 + adc x6, x6, xzr + adds x11, x11, x7 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x6 + mul x7, x3, x5 + umulh x6, x3, x5 + adc x6, x6, xzr + adds x12, x12, x7 + adcs x13, x13, x6 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x6, hs + umulh x7, x2, x2 + mul x8, x2, x2 + adds x9, x9, x7 + mul x7, x3, x3 + adcs x10, x10, x7 + umulh x7, x3, x3 + adcs x11, x11, x7 + mul x7, x4, x4 + adcs x12, x12, x7 + umulh x7, x4, x4 + adcs x13, x13, x7 + mul x7, x5, x5 + adcs x14, x14, x7 + umulh x7, x5, x5 + adc x6, x6, x7 + mov x3, #38 + mul x7, x3, x12 + umulh x4, x3, x12 + adds x8, x8, x7 + mul x7, x3, x13 + umulh x13, x3, x13 + adcs x9, x9, x7 + mul x7, x3, x14 + umulh x14, x3, x14 + adcs x10, x10, x7 + mul x7, x3, x6 + umulh x6, x3, x6 + adcs x11, x11, x7 + cset x12, hs + adds x11, x11, x14 + adc x12, x12, x6 + cmn x11, x11 + bic x11, x11, #0x8000000000000000 + adc x2, x12, x12 + mov x3, #0x13 + mul x7, x3, x2 + adds x2, x8, x7 + adcs x3, x9, x4 + adcs x4, x10, x13 + adc x5, x11, xzr + +// Loop as applicable + + subs x1, x1, #1 + bne edwards25519_decode_alt_loop + +// We know the intermediate result x < 2^256 - 38, and now we do strict +// modular reduction mod 2^255 - 19. Note x < 2^255 - 19 <=> x + 19 < 2^255 +// which is equivalent to a "pl" condition. + + adds x6, x2, #19 + adcs x7, x3, xzr + adcs x8, x4, xzr + adcs x9, x5, xzr + + csel x2, x2, x6, pl + csel x3, x3, x7, pl + csel x4, x4, x8, pl + csel x5, x5, x9, pl + bic x5, x5, #0x8000000000000000 + +// Copy result back into destination and return + + stp x2, x3, [x0] + stp x4, x5, [x0, #16] + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/edwards25519_decode.S b/x86_att/curve25519/edwards25519_decode.S new file mode 100644 index 0000000000..05681925a3 --- /dev/null +++ b/x86_att/curve25519/edwards25519_decode.S @@ -0,0 +1,670 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Decode compressed 256-bit form of edwards25519 point +// Input c[32] (bytes); output function return and z[8] +// +// extern uint64_t edwards25519_decode(uint64_t z[static 8],uint8_t c[static 32]); +// +// This interprets the input byte string as a little-endian number +// representing a point (x,y) on the edwards25519 curve, encoded as +// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +// returns the full pair of coordinates x (at z) and y (at z+4). The +// return code is 0 for success and 1 for failure, which means that +// the input does not correspond to the encoding of any edwards25519 +// point. This can happen for three reasons, where y = the lowest +// 255 bits of the input: +// +// * y >= p_25519 +// Input y coordinate is not reduced +// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +// There is no x such that (x,y) is on the curve +// * y^2 = 1 and top bit of input is set +// Cannot be the canonical encoding of (0,1) or (0,-1) +// +// Standard x86-64 ABI: RDI = z, RSI = c +// Microsoft x64 ABI: RCX = z, RDX = c +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_decode) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_decode) + .text + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack + +#define y 0(%rsp) +#define s (4*N)(%rsp) +#define t (8*N)(%rsp) +#define u (12*N)(%rsp) +#define v (16*N)(%rsp) +#define w (20*N)(%rsp) +#define q (24*N)(%rsp) +#define res (28*N)(%rsp) +#define sgnbit (29*N)(%rsp) +#define badun (30*N)(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (32*N) + +// Corrupted versions when stack is down 8 more + +#define q8 (25*N)(%rsp) + +// Syntactic variants to make x86_att version simpler to generate + +#define Y 0 +#define S (4*N) +#define T (8*N) +#define U (12*N) +#define V (16*N) +#define W (20*N) +#define Q8 (25*N) + +S2N_BN_SYMBOL(edwards25519_decode): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from subroutine offsets + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + callq edwards25519_decode_standard + popq %rsi + popq %rdi + ret + +edwards25519_decode_standard: +#endif + +// Save registers and make room for temporaries + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Save the return pointer for the end so we can overwrite %rdi later + + movq %rdi, res + +// Load the inputs, which can be done word-wise since x86 is little-endian. +// Let y be the lowest 255 bits of the input and sgnbit the desired parity. +// If y >= p_25519 then already flag the input as invalid (badun = 1). + + movq (%rsi), %rax + movq %rax, Y(%rsp) + movq 8(%rsi), %rbx + movq %rbx, Y+8(%rsp) + xorl %ebp, %ebp + movq 16(%rsi), %rcx + movq %rcx, Y+16(%rsp) + movq 24(%rsi), %rdx + btr $63, %rdx + movq %rdx, Y+24(%rsp) + adcq %rbp, %rbp + movq %rbp, sgnbit + + addq $19, %rax + adcq $0, %rbx + adcq $0, %rcx + adcq $0, %rdx + shrq $63, %rdx + movq %rdx, badun + +// u = y^2 - 1 (actually y + 2^255-20, not reduced modulo) +// v = 1 + d * y^2 (not reduced modulo from the +1) +// w = u * v + + leaq V(%rsp), %rdi + movq $1, %rsi + leaq Y(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + movq V(%rsp), %rax + subq $20, %rax + movq V+8(%rsp), %rbx + sbbq $0, %rbx + movq V+16(%rsp), %rcx + sbbq $0, %rcx + movq V+24(%rsp), %rdx + sbbq $0, %rdx + btc $63, %rdx + movq %rax, U(%rsp) + movq %rbx, U+8(%rsp) + movq %rcx, U+16(%rsp) + movq %rdx, U+24(%rsp) + + movq $0x75eb4dca135978a3, %rax + movq %rax, W(%rsp) + movq $0x00700a4d4141d8ab, %rax + movq %rax, W+8(%rsp) + movq $0x8cc740797779e898, %rax + movq %rax, W+16(%rsp) + movq $0x52036cee2b6ffe73, %rax + movq %rax, W+24(%rsp) + leaq V(%rsp), %rdi + leaq W(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + movq V(%rsp), %rax + addq $1, %rax + movq V+8(%rsp), %rbx + adcq $0, %rbx + movq V+16(%rsp), %rcx + adcq $0, %rcx + movq V+24(%rsp), %rdx + adcq $0, %rdx + movq %rax, V(%rsp) + movq %rbx, V+8(%rsp) + movq %rcx, V+16(%rsp) + movq %rdx, V+24(%rsp) + + leaq W(%rsp), %rdi + leaq U(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + +// Get s = w^{252-3} as a candidate inverse square root 1/sqrt(w). +// This power tower computation is the same as bignum_invsqrt_p25519 + + leaq T(%rsp), %rdi + movq $1, %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq T(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $2, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $1, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $5, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $10, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $5, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $25, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $50, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $25, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $125, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + + leaq S(%rsp), %rdi + movq $2, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq S(%rsp), %rdi + leaq S(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + +// Compute v' = s^2 * w to discriminate whether the square root sqrt(u/v) +// exists, in which case we should get 0, 1 or -1. + + leaq V(%rsp), %rdi + movq $1, %rsi + leaq S(%rsp), %rdx + callq edwards25519_decode_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq V(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + +// Get the two candidates for sqrt(u / v), one being s = u * w^{252-3} +// and the other being t = s * j_25519 where j_25519 = sqrt(-1). + + leaq S(%rsp), %rdi + leaq U(%rsp), %rsi + leaq S(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + movq $0xc4ee1b274a0ea0b0, %rax + movq %rax, T(%rsp) + movq $0x2f431806ad2fe478, %rax + movq %rax, T+8(%rsp) + movq $0x2b4d00993dfbd7a7, %rax + movq %rax, T+16(%rsp) + movq $0x2b8324804fc1df0b, %rax + movq %rax, T+24(%rsp) + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_mul_p25519 + +// %rax = 0 <=> s^2 * w = 0 or 1 + + movq V(%rsp), %r8 + movq V+8(%rsp), %r9 + movq V+16(%rsp), %r10 + movq V+24(%rsp), %r11 + movl $1, %eax + notq %rax + andq %r8, %rax + orq %r9, %rax + orq %r10, %rax + orq %r11, %rax + +// %r8 = 0 <=> s^2 * w = -1 (mod p_25519, i.e. s^2 * w = 2^255 - 20) + + addq $20, %r8 + notq %r9 + notq %r10 + bts $63, %r11 + addq $1, %r11 + orq %r9, %r8 + orq %r11, %r10 + orq %r10, %r8 + +// If s^2 * w is not 0 or 1 then replace s by t + + testq %rax, %rax + + movq S(%rsp), %r12 + movq T(%rsp), %rbx + cmovnzq %rbx, %r12 + movq S+8(%rsp), %r13 + movq T+8(%rsp), %rbx + cmovnzq %rbx, %r13 + movq S+16(%rsp), %r14 + movq T+16(%rsp), %rbx + cmovnzq %rbx, %r14 + movq S+24(%rsp), %r15 + movq T+24(%rsp), %rbx + cmovnzq %rbx, %r15 + movq %r12, S(%rsp) + movq %r13, S+8(%rsp) + movq %r14, S+16(%rsp) + movq %r15, S+24(%rsp) + +// Check invalidity, occurring if s^2 * w is not in {0,1,-1} + + cmovzq %rax, %r8 + negq %r8 + sbbq %r8, %r8 + negq %r8 + orq %r8, badun + +// Let [%r11;%r10;%r9;%r8] = s and [%r15;%r14;%r13;%r12] = p_25519 - s + + movq S(%rsp), %r8 + movq $-19, %r12 + subq %r8, %r12 + movq S+8(%rsp), %r9 + movq $-1, %r13 + sbbq %r9, %r13 + movq S+16(%rsp), %r10 + movq $-1, %r14 + sbbq %r10, %r14 + movq S+24(%rsp), %r11 + movq $0x7FFFFFFFFFFFFFFF, %r15 + sbbq %r11, %r15 + +// Decide whether a flip is apparently indicated, s_0 <=> sgnbit +// Decide also if s = 0 by OR-ing its digits. Now if a flip is indicated: +// - if s = 0 then mark as invalid +// - if s <> 0 then indeed flip + + movl $1, %ecx + andq %r8, %rcx + xorq sgnbit, %rcx + movq badun, %rdx + movq %rdx, %rsi + orq %rcx, %rdx + xorl %ebp, %ebp + movq %r8, %rax + movq %r9, %rbx + orq %r10, %rax + orq %r11, %rbx + orq %rbx, %rax + cmovzq %rbp, %rcx + cmovnzq %rsi, %rdx + +// Actual selection of x as s or -s, copying of y and return of validity + + testq %rcx, %rcx + + cmovnzq %r12, %r8 + cmovnzq %r13, %r9 + cmovnzq %r14, %r10 + cmovnzq %r15, %r11 + + movq res, %rdi + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq Y(%rsp), %rcx + movq %rcx, 32(%rdi) + movq Y+8(%rsp), %rcx + movq %rcx, 40(%rdi) + movq Y+16(%rsp), %rcx + movq %rcx, 48(%rdi) + movq Y+24(%rsp), %rcx + movq %rcx, 56(%rdi) + + movq %rdx, %rax + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// ************************************************************* +// Local z = x * y +// ************************************************************* + +edwards25519_decode_mul_p25519: + movq %rdx, %rcx + xorl %ebp, %ebp + movq (%rcx), %rdx + mulxq (%rsi), %r8, %r9 + mulxq 0x8(%rsi), %rax, %r10 + addq %rax, %r9 + mulxq 0x10(%rsi), %rax, %r11 + adcq %rax, %r10 + mulxq 0x18(%rsi), %rax, %r12 + adcq %rax, %r11 + adcq %rbp, %r12 + xorl %ebp, %ebp + movq 0x8(%rcx), %rdx + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsi), %rax, %r13 + adcxq %rax, %r12 + adoxq %rbp, %r13 + adcq %rbp, %r13 + xorl %ebp, %ebp + movq 0x10(%rcx), %rdx + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x18(%rsi), %rax, %r14 + adcxq %rax, %r13 + adoxq %rbp, %r14 + adcq %rbp, %r14 + xorl %ebp, %ebp + movq 0x18(%rcx), %rdx + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsi), %rcx, %r15 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + movl $0x26, %edx + mulxq %r15, %rax, %rbx + adcxq %rcx, %r14 + adoxq %rbp, %r15 + adcq %rbp, %r15 + addq %r11, %rax + adcq %rbp, %rbx + btq $0x3f, %rax + adcq %rbx, %rbx + leaq 0x1(%rbx), %rcx + imulq $0x13, %rcx, %rcx + xorl %ebp, %ebp + adoxq %rcx, %r8 + mulxq %r12, %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq %r13, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r14, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq %r15, %rax, %rbx + adcq %rax, %r11 + shlq $0x3f, %rcx + cmpq %rcx, %r11 + movl $0x13, %eax + cmovns %rbp, %rax + subq %rax, %r8 + sbbq %rbp, %r9 + sbbq %rbp, %r10 + sbbq %rbp, %r11 + btr $0x3f, %r11 + movq %r8, (%rdi) + movq %r9, 0x8(%rdi) + movq %r10, 0x10(%rdi) + movq %r11, 0x18(%rdi) + ret + +// ************************************************************* +// Local z = 2^n * x +// ************************************************************* + +edwards25519_decode_nsqr_p25519: + +// Copy input argument into q + + movq (%rdx), %rax + movq 8(%rdx), %rbx + movq 16(%rdx), %rcx + movq 24(%rdx), %rdx + movq %rax, Q8(%rsp) + movq %rbx, Q8+8(%rsp) + movq %rcx, Q8+16(%rsp) + movq %rdx, Q8+24(%rsp) + +// Main squaring loop, accumulating in u consistently and +// only ensuring the intermediates are < 2 * p_25519 = 2^256 - 38 + +edwards25519_decode_loop: + movq Q8(%rsp), %rdx + mulxq %rdx, %r8, %r15 + mulxq Q8+0x8(%rsp), %r9, %r10 + mulxq Q8+0x18(%rsp), %r11, %r12 + movq Q8+0x10(%rsp), %rdx + mulxq Q8+0x18(%rsp), %r13, %r14 + xorl %ebx, %ebx + mulxq Q8(%rsp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + mulxq Q8+0x8(%rsp), %rax, %rcx + adcxq %rax, %r11 + adoxq %rcx, %r12 + movq Q8+0x18(%rsp), %rdx + mulxq Q8+0x8(%rsp), %rax, %rcx + adcxq %rax, %r12 + adoxq %rcx, %r13 + adcxq %rbx, %r13 + adoxq %rbx, %r14 + adcq %rbx, %r14 + xorl %ebx, %ebx + adcxq %r9, %r9 + adoxq %r15, %r9 + movq Q8+0x8(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq Q8+0x10(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq Q8+0x18(%rsp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rbx, %r15 + adoxq %rbx, %r15 + movl $0x26, %edx + xorl %ebx, %ebx + mulxq %r12, %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + mulxq %r13, %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + mulxq %r14, %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + mulxq %r15, %rax, %r12 + adcxq %rax, %r11 + adoxq %rbx, %r12 + adcxq %rbx, %r12 + shldq $0x1, %r11, %r12 + btr $0x3f, %r11 + movl $0x13, %edx + imulq %r12, %rdx + addq %rdx, %r8 + adcq %rbx, %r9 + adcq %rbx, %r10 + adcq %rbx, %r11 + movq %r8, Q8(%rsp) + movq %r9, Q8+0x8(%rsp) + movq %r10, Q8+0x10(%rsp) + movq %r11, Q8+0x18(%rsp) + +// Loop as applicable + + decq %rsi + jnz edwards25519_decode_loop + +// We know the intermediate result x < 2^256 - 38, and now we do strict +// modular reduction mod 2^255 - 19. Note x < 2^255 - 19 <=> x + 19 < 2^255 +// which is equivalent to a "ns" condition. We just use the results where +// they were in registers [%r11;%r10;%r9;%r8] instead of re-loading them. + + movl $19, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rdx + + cmovns %r8, %rax + cmovns %r9, %rbx + cmovns %r10, %rcx + cmovns %r11, %rdx + btr $63, %rdx + movq %rax, (%rdi) + movq %rbx, 8(%rdi) + movq %rcx, 16(%rdi) + movq %rdx, 24(%rdi) + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/x86_att/curve25519/edwards25519_decode_alt.S b/x86_att/curve25519/edwards25519_decode_alt.S new file mode 100644 index 0000000000..570b2f9081 --- /dev/null +++ b/x86_att/curve25519/edwards25519_decode_alt.S @@ -0,0 +1,751 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Decode compressed 256-bit form of edwards25519 point +// Input c[32] (bytes); output function return and z[8] +// +// extern uint64_t edwards25519_decode_alt(uint64_t z[static 8],uint8_t c[static 32]); +// +// This interprets the input byte string as a little-endian number +// representing a point (x,y) on the edwards25519 curve, encoded as +// 2^255 * x_0 + y where x_0 is the least significant bit of x. It +// returns the full pair of coordinates x (at z) and y (at z+4). The +// return code is 0 for success and 1 for failure, which means that +// the input does not correspond to the encoding of any edwards25519 +// point. This can happen for three reasons, where y = the lowest +// 255 bits of the input: +// +// * y >= p_25519 +// Input y coordinate is not reduced +// * (y^2 - 1) * (1 + d_25519 * y^2) has no modular square root +// There is no x such that (x,y) is on the curve +// * y^2 = 1 and top bit of input is set +// Cannot be the canonical encoding of (0,1) or (0,-1) +// +// Standard x86-64 ABI: RDI = z, RSI = c +// Microsoft x64 ABI: RCX = z, RDX = c +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_decode_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_decode_alt) + .text + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack + +#define y 0(%rsp) +#define s (4*N)(%rsp) +#define t (8*N)(%rsp) +#define u (12*N)(%rsp) +#define v (16*N)(%rsp) +#define w (20*N)(%rsp) +#define q (24*N)(%rsp) +#define res (28*N)(%rsp) +#define sgnbit (29*N)(%rsp) +#define badun (30*N)(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (32*N) + +// Corrupted versions when stack is down 8 more + +#define q8 (25*N)(%rsp) + +// Syntactic variants to make x86_att version simpler to generate + +#define Y 0 +#define S (4*N) +#define T (8*N) +#define U (12*N) +#define V (16*N) +#define W (20*N) +#define Q8 (25*N) + +S2N_BN_SYMBOL(edwards25519_decode_alt): + +// In this case the Windows form literally makes a subroutine call. +// This avoids hassle arising from subroutine offsets + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + callq edwards25519_decode_alt_standard + popq %rsi + popq %rdi + ret + +edwards25519_decode_alt_standard: +#endif + +// Save registers and make room for temporaries + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Save the return pointer for the end so we can overwrite %rdi later + + movq %rdi, res + +// Load the inputs, which can be done word-wise since x86 is little-endian. +// Let y be the lowest 255 bits of the input and sgnbit the desired parity. +// If y >= p_25519 then already flag the input as invalid (badun = 1). + + movq (%rsi), %rax + movq %rax, Y(%rsp) + movq 8(%rsi), %rbx + movq %rbx, Y+8(%rsp) + xorl %ebp, %ebp + movq 16(%rsi), %rcx + movq %rcx, Y+16(%rsp) + movq 24(%rsi), %rdx + btr $63, %rdx + movq %rdx, Y+24(%rsp) + adcq %rbp, %rbp + movq %rbp, sgnbit + + addq $19, %rax + adcq $0, %rbx + adcq $0, %rcx + adcq $0, %rdx + shrq $63, %rdx + movq %rdx, badun + +// u = y^2 - 1 (actually y + 2^255-20, not reduced modulo) +// v = 1 + d * y^2 (not reduced modulo from the +1) +// w = u * v + + leaq V(%rsp), %rdi + movq $1, %rsi + leaq Y(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + movq V(%rsp), %rax + subq $20, %rax + movq V+8(%rsp), %rbx + sbbq $0, %rbx + movq V+16(%rsp), %rcx + sbbq $0, %rcx + movq V+24(%rsp), %rdx + sbbq $0, %rdx + btc $63, %rdx + movq %rax, U(%rsp) + movq %rbx, U+8(%rsp) + movq %rcx, U+16(%rsp) + movq %rdx, U+24(%rsp) + + movq $0x75eb4dca135978a3, %rax + movq %rax, W(%rsp) + movq $0x00700a4d4141d8ab, %rax + movq %rax, W+8(%rsp) + movq $0x8cc740797779e898, %rax + movq %rax, W+16(%rsp) + movq $0x52036cee2b6ffe73, %rax + movq %rax, W+24(%rsp) + leaq V(%rsp), %rdi + leaq W(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + movq V(%rsp), %rax + addq $1, %rax + movq V+8(%rsp), %rbx + adcq $0, %rbx + movq V+16(%rsp), %rcx + adcq $0, %rcx + movq V+24(%rsp), %rdx + adcq $0, %rdx + movq %rax, V(%rsp) + movq %rbx, V+8(%rsp) + movq %rcx, V+16(%rsp) + movq %rdx, V+24(%rsp) + + leaq W(%rsp), %rdi + leaq U(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + +// Get s = w^{252-3} as a candidate inverse square root 1/sqrt(w). +// This power tower computation is the same as bignum_invsqrt_p25519 + + leaq T(%rsp), %rdi + movq $1, %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq T(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $2, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $1, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $5, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $10, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $5, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $25, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $50, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $25, %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $125, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq S(%rsp), %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + + leaq S(%rsp), %rdi + movq $2, %rsi + leaq V(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq S(%rsp), %rdi + leaq S(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + +// Compute v' = s^2 * w to discriminate whether the square root sqrt(u/v) +// exists, in which case we should get 0, 1 or -1. + + leaq V(%rsp), %rdi + movq $1, %rsi + leaq S(%rsp), %rdx + callq edwards25519_decode_alt_nsqr_p25519 + + leaq V(%rsp), %rdi + leaq V(%rsp), %rsi + leaq W(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + +// Get the two candidates for sqrt(u / v), one being s = u * w^{252-3} +// and the other being t = s * j_25519 where j_25519 = sqrt(-1). + + leaq S(%rsp), %rdi + leaq U(%rsp), %rsi + leaq S(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + movq $0xc4ee1b274a0ea0b0, %rax + movq %rax, T(%rsp) + movq $0x2f431806ad2fe478, %rax + movq %rax, T+8(%rsp) + movq $0x2b4d00993dfbd7a7, %rax + movq %rax, T+16(%rsp) + movq $0x2b8324804fc1df0b, %rax + movq %rax, T+24(%rsp) + leaq T(%rsp), %rdi + leaq S(%rsp), %rsi + leaq T(%rsp), %rdx + callq edwards25519_decode_alt_mul_p25519 + +// %rax = 0 <=> s^2 * w = 0 or 1 + + movq V(%rsp), %r8 + movq V+8(%rsp), %r9 + movq V+16(%rsp), %r10 + movq V+24(%rsp), %r11 + movl $1, %eax + notq %rax + andq %r8, %rax + orq %r9, %rax + orq %r10, %rax + orq %r11, %rax + +// %r8 = 0 <=> s^2 * w = -1 (mod p_25519, i.e. s^2 * w = 2^255 - 20) + + addq $20, %r8 + notq %r9 + notq %r10 + bts $63, %r11 + addq $1, %r11 + orq %r9, %r8 + orq %r11, %r10 + orq %r10, %r8 + +// If s^2 * w is not 0 or 1 then replace s by t + + testq %rax, %rax + + movq S(%rsp), %r12 + movq T(%rsp), %rbx + cmovnzq %rbx, %r12 + movq S+8(%rsp), %r13 + movq T+8(%rsp), %rbx + cmovnzq %rbx, %r13 + movq S+16(%rsp), %r14 + movq T+16(%rsp), %rbx + cmovnzq %rbx, %r14 + movq S+24(%rsp), %r15 + movq T+24(%rsp), %rbx + cmovnzq %rbx, %r15 + movq %r12, S(%rsp) + movq %r13, S+8(%rsp) + movq %r14, S+16(%rsp) + movq %r15, S+24(%rsp) + +// Check invalidity, occurring if s^2 * w is not in {0,1,-1} + + cmovzq %rax, %r8 + negq %r8 + sbbq %r8, %r8 + negq %r8 + orq %r8, badun + +// Let [%r11;%r10;%r9;%r8] = s and [%r15;%r14;%r13;%r12] = p_25519 - s + + movq S(%rsp), %r8 + movq $-19, %r12 + subq %r8, %r12 + movq S+8(%rsp), %r9 + movq $-1, %r13 + sbbq %r9, %r13 + movq S+16(%rsp), %r10 + movq $-1, %r14 + sbbq %r10, %r14 + movq S+24(%rsp), %r11 + movq $0x7FFFFFFFFFFFFFFF, %r15 + sbbq %r11, %r15 + +// Decide whether a flip is apparently indicated, s_0 <=> sgnbit +// Decide also if s = 0 by OR-ing its digits. Now if a flip is indicated: +// - if s = 0 then mark as invalid +// - if s <> 0 then indeed flip + + movl $1, %ecx + andq %r8, %rcx + xorq sgnbit, %rcx + movq badun, %rdx + movq %rdx, %rsi + orq %rcx, %rdx + xorl %ebp, %ebp + movq %r8, %rax + movq %r9, %rbx + orq %r10, %rax + orq %r11, %rbx + orq %rbx, %rax + cmovzq %rbp, %rcx + cmovnzq %rsi, %rdx + +// Actual selection of x as s or -s, copying of y and return of validity + + testq %rcx, %rcx + + cmovnzq %r12, %r8 + cmovnzq %r13, %r9 + cmovnzq %r14, %r10 + cmovnzq %r15, %r11 + + movq res, %rdi + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq Y(%rsp), %rcx + movq %rcx, 32(%rdi) + movq Y+8(%rsp), %rcx + movq %rcx, 40(%rdi) + movq Y+16(%rsp), %rcx + movq %rcx, 48(%rdi) + movq Y+24(%rsp), %rcx + movq %rcx, 56(%rdi) + + movq %rdx, %rax + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +// ************************************************************* +// Local z = x * y +// ************************************************************* + +edwards25519_decode_alt_mul_p25519: + movq %rdx, %rcx + movq (%rsi), %rax + mulq (%rcx) + movq %rax, %r8 + movq %rdx, %r9 + xorq %r10, %r10 + xorq %r11, %r11 + movq (%rsi), %rax + mulq 0x8(%rcx) + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x8(%rsi), %rax + mulq (%rcx) + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x0, %r11 + xorq %r12, %r12 + movq (%rsi), %rax + mulq 0x10(%rcx) + addq %rax, %r10 + adcq %rdx, %r11 + adcq %r12, %r12 + movq 0x8(%rsi), %rax + mulq 0x8(%rcx) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x0, %r12 + movq 0x10(%rsi), %rax + mulq (%rcx) + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x0, %r12 + xorq %r13, %r13 + movq (%rsi), %rax + mulq 0x18(%rcx) + addq %rax, %r11 + adcq %rdx, %r12 + adcq %r13, %r13 + movq 0x8(%rsi), %rax + mulq 0x10(%rcx) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x0, %r13 + movq 0x10(%rsi), %rax + mulq 0x8(%rcx) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x0, %r13 + movq 0x18(%rsi), %rax + mulq (%rcx) + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x0, %r13 + xorq %r14, %r14 + movq 0x8(%rsi), %rax + mulq 0x18(%rcx) + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x10(%rsi), %rax + mulq 0x10(%rcx) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + movq 0x18(%rsi), %rax + mulq 0x8(%rcx) + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorq %r15, %r15 + movq 0x10(%rsi), %rax + mulq 0x18(%rcx) + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x18(%rsi), %rax + mulq 0x10(%rcx) + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x0, %r15 + movq 0x18(%rsi), %rax + mulq 0x18(%rcx) + addq %rax, %r14 + adcq %rdx, %r15 + movl $0x26, %esi + movq %r12, %rax + mulq %rsi + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %rcx, %rcx + movq %r13, %rax + mulq %rsi + subq %rcx, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r14, %rax + mulq %rsi + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq %r15, %rax + mulq %rsi + subq %rcx, %rdx + xorq %rcx, %rcx + addq %rax, %r11 + movq %rdx, %r12 + adcq %rcx, %r12 + shldq $0x1, %r11, %r12 + leaq 0x1(%r12), %rax + movl $0x13, %esi + bts $0x3f, %r11 + imulq %rsi, %rax + addq %rax, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rcx, %r11 + sbbq %rax, %rax + notq %rax + andq %rsi, %rax + subq %rax, %r8 + sbbq %rcx, %r9 + sbbq %rcx, %r10 + sbbq %rcx, %r11 + btr $0x3f, %r11 + movq %r8, (%rdi) + movq %r9, 0x8(%rdi) + movq %r10, 0x10(%rdi) + movq %r11, 0x18(%rdi) + ret + +// ************************************************************* +// Local z = 2^n * x +// ************************************************************* + +edwards25519_decode_alt_nsqr_p25519: + +// Copy input argument into q + + movq (%rdx), %rax + movq 8(%rdx), %rbx + movq 16(%rdx), %rcx + movq 24(%rdx), %rdx + movq %rax, Q8(%rsp) + movq %rbx, Q8+8(%rsp) + movq %rcx, Q8+16(%rsp) + movq %rdx, Q8+24(%rsp) + +// Main squaring loop, accumulating in u consistently and +// only ensuring the intermediates are < 2 * p_25519 = 2^256 - 38 + +edwards25519_decode_alt_loop: + movq Q8(%rsp), %rax + mulq %rax + movq %rax, %r8 + movq %rdx, %r9 + xorq %r10, %r10 + xorq %r11, %r11 + movq Q8(%rsp), %rax + mulq Q8+0x8(%rsp) + addq %rax, %rax + adcq %rdx, %rdx + adcq $0x0, %r11 + addq %rax, %r9 + adcq %rdx, %r10 + adcq $0x0, %r11 + xorq %r12, %r12 + movq Q8+0x8(%rsp), %rax + mulq %rax + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x0, %r12 + movq Q8(%rsp), %rax + mulq Q8+0x10(%rsp) + addq %rax, %rax + adcq %rdx, %rdx + adcq $0x0, %r12 + addq %rax, %r10 + adcq %rdx, %r11 + adcq $0x0, %r12 + xorq %r13, %r13 + movq Q8(%rsp), %rax + mulq Q8+0x18(%rsp) + addq %rax, %rax + adcq %rdx, %rdx + adcq $0x0, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x0, %r13 + movq Q8+0x8(%rsp), %rax + mulq Q8+0x10(%rsp) + addq %rax, %rax + adcq %rdx, %rdx + adcq $0x0, %r13 + addq %rax, %r11 + adcq %rdx, %r12 + adcq $0x0, %r13 + xorq %r14, %r14 + movq Q8+0x8(%rsp), %rax + mulq Q8+0x18(%rsp) + addq %rax, %rax + adcq %rdx, %rdx + adcq $0x0, %r14 + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + movq Q8+0x10(%rsp), %rax + mulq %rax + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorq %r15, %r15 + movq Q8+0x10(%rsp), %rax + mulq Q8+0x18(%rsp) + addq %rax, %rax + adcq %rdx, %rdx + adcq $0x0, %r15 + addq %rax, %r13 + adcq %rdx, %r14 + adcq $0x0, %r15 + movq Q8+0x18(%rsp), %rax + mulq %rax + addq %rax, %r14 + adcq %rdx, %r15 + movl $0x26, %ebx + movq %r12, %rax + mulq %rbx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %rcx, %rcx + movq %r13, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r14, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq %r15, %rax + mulq %rbx + subq %rcx, %rdx + xorq %rcx, %rcx + addq %rax, %r11 + movq %rdx, %r12 + adcq %rcx, %r12 + shldq $0x1, %r11, %r12 + btr $0x3f, %r11 + movl $0x13, %edx + imulq %r12, %rdx + addq %rdx, %r8 + adcq %rcx, %r9 + adcq %rcx, %r10 + adcq %rcx, %r11 + movq %r8, Q8(%rsp) + movq %r9, Q8+0x8(%rsp) + movq %r10, Q8+0x10(%rsp) + movq %r11, Q8+0x18(%rsp) + +// Loop as applicable + + decq %rsi + jnz edwards25519_decode_alt_loop + +// We know the intermediate result x < 2^256 - 38, and now we do strict +// modular reduction mod 2^255 - 19. Note x < 2^255 - 19 <=> x + 19 < 2^255 +// which is equivalent to a "ns" condition. We just use the results where +// they were in registers [%r11;%r10;%r9;%r8] instead of re-loading them. + + movl $19, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rdx + + cmovns %r8, %rax + cmovns %r9, %rbx + cmovns %r10, %rcx + cmovns %r11, %rdx + btr $63, %rdx + movq %rax, (%rdi) + movq %rbx, 8(%rdi) + movq %rcx, 16(%rdi) + movq %rdx, 24(%rdi) + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif From d2bb439374c9d3733ca8c25e2f1c61f74924cec1 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 17 Oct 2023 17:36:25 -0700 Subject: [PATCH 39/42] Add generic size curve25519/edwards25519 basepoint modulus The function bignum_mod_n25519 performs reduction of an input of any size (k digits) modulo the order of the curve25519/edwards25519 basepoint, n_25519 = 2^252 + 27742317777372353535851937790883648493. It generalizes bignum_mod_n25519_4, which is the special case of 4-digit (256-bit) inputs. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/e23fd300aab6a28455133d495b458074d5d810f1 --- arm/curve25519/bignum_mod_n25519.S | 186 ++++++++++++++++++++ x86_att/curve25519/bignum_mod_n25519.S | 228 +++++++++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 arm/curve25519/bignum_mod_n25519.S create mode 100644 x86_att/curve25519/bignum_mod_n25519.S diff --git a/arm/curve25519/bignum_mod_n25519.S b/arm/curve25519/bignum_mod_n25519.S new file mode 100644 index 0000000000..5a256ed133 --- /dev/null +++ b/arm/curve25519/bignum_mod_n25519.S @@ -0,0 +1,186 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Reduce modulo basepoint order, z := x mod n_25519 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n25519 +// (uint64_t z[static 4], uint64_t k, uint64_t *x); +// +// Reduction is modulo the order of the curve25519/edwards25519 basepoint, +// which is n_25519 = 2^252 + 27742317777372353535851937790883648493 +// +// Standard ARM ABI: X0 = z, X1 = k, X2 = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n25519) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n25519) + .text + .balign 4 + +#define z x0 +#define k x1 +#define x x2 + +#define m0 x3 +#define m1 x4 +#define m2 x5 +#define m3 x6 + +#define t0 x7 +#define t1 x8 +#define t2 x9 +#define t3 x10 + +#define n0 x11 +#define n1 x12 + +// These two are aliased: we only load d when finished with q + +#define q x13 +#define d x13 + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +S2N_BN_SYMBOL(bignum_mod_n25519): + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmp k, #4 + bcc short + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 +// This [m3;m2;m1;m0] is the initial x where we begin reduction. + + sub k, k, #4 + lsl t0, k, #3 + add t0, t0, x + ldp m2, m3, [t0, #16] + ldp m0, m1, [t0] + +// Load the complicated two words of n_25519 = 2^252 + [n1; n0] + + movbig( n0, #0x5812, #0x631a, #0x5cf5, #0xd3ed) + movbig( n1, #0x14de, #0xf9de, #0xa2f7, #0x9cd6) + +// Get the quotient estimate q = floor(x/2^252). +// Also delete it from m3, in effect doing x' = x - q * 2^252 + + lsr q, m3, #60 + and m3, m3, #0x0FFFFFFFFFFFFFFF + +// Multiply [t2;t1;t0] = q * [n1;n0] + + mul t0, n0, q + mul t1, n1, q + umulh t2, n0, q + adds t1, t1, t2 + umulh t2, n1, q + adc t2, t2, xzr + +// Subtract [m3;m2;m1;m0] = x' - q * [n1;n0] = x - q * n_25519 + + subs m0, m0, t0 + sbcs m1, m1, t1 + sbcs m2, m2, t2 + sbcs m3, m3, xzr + +// If this borrows (CF = 0 because of inversion), add back n_25519. +// The masked n3 digit exploits the fact that bit 60 of n0 is set. + + csel t0, n0, xzr, cc + csel t1, n1, xzr, cc + adds m0, m0, t0 + adcs m1, m1, t1 + and t0, t0, #0x1000000000000000 + adcs m2, m2, xzr + adc m3, m3, t0 + +// Now do (k-4) iterations of 5->4 word modular reduction. Each one +// is similar to the sequence above except for the more refined quotient +// estimation process. + + cbz k, writeback + +loop: + +// Assume that the new 5-digit x is 2^64 * previous_x + next_digit. +// Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1) +// and first compute x' = x - 2^252 * q. + + extr q, m3, m2, #60 + and m2, m2, #0x0FFFFFFFFFFFFFFF + sub q, q, m3, lsr #60 + and m3, m3, #0xF000000000000000 + add m2, m2, m3 + +// Multiply [t2;t1;t0] = q * [n1;n0] + + mul t0, n0, q + mul t1, n1, q + umulh t2, n0, q + adds t1, t1, t2 + umulh t2, n1, q + adc t2, t2, xzr + +// Decrement k and load the next digit (note that d aliases to q) + + sub k, k, #1 + ldr d, [x, k, lsl #3] + +// Subtract [t3;t2;t1;t0] = x' - q * [n1;n0] = x - q * n_25519 + + subs t0, d, t0 + sbcs t1, m0, t1 + sbcs t2, m1, t2 + sbcs t3, m2, xzr + +// If this borrows (CF = 0 because of inversion), add back n_25519. +// The masked n3 digit exploits the fact that bit 60 of n1 is set. + + csel m0, n0, xzr, cc + csel m1, n1, xzr, cc + adds m0, t0, m0 + and m3, m1, #0x1000000000000000 + adcs m1, t1, m1 + adcs m2, t2, xzr + adc m3, t3, m3 + + cbnz k, loop + +// Finally write back [m3;m2;m1;m0] and return + +writeback: + stp m0, m1, [z] + stp m2, m3, [z, #16] + ret + +// Short case: just copy the input with zero-padding + +short: + mov m0, xzr + mov m1, xzr + mov m2, xzr + mov m3, xzr + + cbz k, writeback + ldr m0, [x] + subs k, k, #1 + beq writeback + ldr m1, [x, #8] + subs k, k, #1 + beq writeback + ldr m2, [x, #16] + b writeback + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/x86_att/curve25519/bignum_mod_n25519.S b/x86_att/curve25519/bignum_mod_n25519.S new file mode 100644 index 0000000000..c45d99b541 --- /dev/null +++ b/x86_att/curve25519/bignum_mod_n25519.S @@ -0,0 +1,228 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC + +// ---------------------------------------------------------------------------- +// Reduce modulo basepoint order, z := x mod n_25519 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n25519 +// (uint64_t z[static 4], uint64_t k, uint64_t *x); +// +// Reduction is modulo the order of the curve25519/edwards25519 basepoint, +// which is n_25519 = 2^252 + 27742317777372353535851937790883648493 +// +// Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n25519) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n25519) + .text + +#define z %rdi +#define k %rsi +#define x %rcx + +#define m0 %r8 +#define m1 %r9 +#define m2 %r10 +#define m3 %r11 +#define d %r12 + +#define q %rbx + +S2N_BN_SYMBOL(bignum_mod_n25519): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save extra registers + + pushq %rbx + pushq %rbp + pushq %r12 + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmpq $4, k + jc shortinput + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 +// This [m3;m2;m1;m0] is the initial x where we begin reduction. + + subq $4, k + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 + +// Move x into another register to leave %rdx free for multiplies + + movq %rdx, x + +// Get the quotient estimate q = floor(x/2^252). +// Also delete it from m3, in effect doing x' = x - q * 2^252 + + movq m3, q + shrq $60, q + + shlq $4, m3 + shrq $4, m3 + +// Let [%rdx;d;%rbp] = q * (n_25519 - 2^252) + + movq $0x5812631a5cf5d3ed, %rax + mulq q + movq %rax, %rbp + movq %rdx, d + + movq $0x14def9dea2f79cd6, %rax + mulq q + addq %rax, d + adcq $0, %rdx + +// Subtract to get x' - q * (n_25519 - 2^252) = x - q * n_25519 + + subq %rbp, m0 + sbbq d, m1 + sbbq %rdx, m2 + sbbq $0, m3 + +// Get a bitmask for the borrow and create a masked version of +// non-trivial digits of [%rbx;0;%rdx;%rax] = n_25519, then add it. +// The masked n3 digit exploits the fact that bit 60 of n0 is set. + + sbbq %rbx, %rbx + + movq $0x5812631a5cf5d3ed, %rax + andq %rbx, %rax + movq $0x14def9dea2f79cd6, %rdx + andq %rbx, %rdx + movq $0x1000000000000000, %rbx + andq %rax, %rbx + + addq %rax, m0 + adcq %rdx, m1 + adcq $0, m2 + adcq %rbx, m3 + +// Now do (k-4) iterations of 5->4 word modular reduction. Each one +// is similar to the sequence above except for the more refined quotient +// estimation process. + + testq k, k + jz writeback + +loop: + +// Assume that the new 5-digit x is 2^64 * previous_x + next_digit. +// Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1) +// and first compute x' = x - 2^252 * q. + + movq m3, q + shldq $4, m2, q + shrq $60, m3 + subq m3, q + shlq $4, m2 + shrdq $4, m3, m2 + +// Let [%rdx;m3;%rbp] = q * (n_25519 - 2^252) + + movq $0x5812631a5cf5d3ed, %rax + mulq q + movq %rax, %rbp + movq %rdx, m3 + + movq $0x14def9dea2f79cd6, %rax + mulq q + addq %rax, m3 + adcq $0, %rdx + +// Load the next digit + + movq -8(x,k,8), d + +// Subtract to get x' - q * (n_25519 - 2^252) = x - q * n_25519 + + subq %rbp, d + sbbq m3, m0 + sbbq %rdx, m1 + sbbq $0, m2 + +// Get a bitmask for the borrow and create a masked version of +// non-trivial digits of [%rbx;0;%rdx;%rax] = n_25519, then add it. +// The masked n3 digit exploits the fact that bit 60 of n0 is set. + + sbbq %rbx, %rbx + + movq $0x5812631a5cf5d3ed, %rax + andq %rbx, %rax + movq $0x14def9dea2f79cd6, %rdx + andq %rbx, %rdx + movq $0x1000000000000000, %rbx + andq %rax, %rbx + + addq %rax, d + adcq %rdx, m0 + adcq $0, m1 + adcq %rbx, m2 + +// Now shuffle registers up and loop + + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 + + decq k + jnz loop + +// Write back + +writeback: + + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + +// Restore registers and return + + popq %r12 + popq %rbp + popq %rbx +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +shortinput: + + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + + testq k, k + jz writeback + movq (%rdx), m0 + decq k + jz writeback + movq 8(%rdx), m1 + decq k + jz writeback + movq 16(%rdx), m2 + jmp writeback + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif From 7f883e57b9fa578dccee143dc27c5c7965d3ebc6 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 27 Oct 2023 20:14:40 -0700 Subject: [PATCH 40/42] Switch curve25519 operations to divstep-based modular inverse This replaces the inlined variant of "bignum_modinv" with code from "bignum_inv_p25519" in all "curve25519_" functions returning an affine point and hence using modular inverse. There are also a few consequential changes related to the slightly different amount of temporary storage needed by this function. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/777d5745b1b71f8c311d32bb922399653ffb8df3 --- arm/curve25519/curve25519_x25519.S | 1392 +++++++++---- arm/curve25519/curve25519_x25519_alt.S | 1392 +++++++++---- arm/curve25519/curve25519_x25519_byte.S | 1392 +++++++++---- arm/curve25519/curve25519_x25519_byte_alt.S | 1392 +++++++++---- arm/curve25519/curve25519_x25519base.S | 1394 +++++++++---- arm/curve25519/curve25519_x25519base_alt.S | 1394 +++++++++---- arm/curve25519/curve25519_x25519base_byte.S | 1394 +++++++++---- .../curve25519_x25519base_byte_alt.S | 1397 +++++++++---- x86_att/curve25519/curve25519_x25519.S | 1754 +++++++++++++---- x86_att/curve25519/curve25519_x25519_alt.S | 1754 +++++++++++++---- x86_att/curve25519/curve25519_x25519base.S | 1744 ++++++++++++---- .../curve25519/curve25519_x25519base_alt.S | 1750 ++++++++++++---- 12 files changed, 13721 insertions(+), 4428 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index d66884d5d4..26d96c2617 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -849,356 +849,1046 @@ curve25519_x25519_scalarloop: mul_p25519(zn,p,e) // The projective result of the scalar multiplication is now (xn,zn). -// First set up the constant sn = 2^255 - 19 for the modular inverse. - - mov x0, #-19 - mov x1, #-1 - mov x2, #0x7fffffffffffffff - stp x0, x1, [sn] - stp x1, x2, [sn+16] - -// Prepare to call the modular inverse function to get zm = 1/zn - - mov x0, #4 - add x1, zm - add x2, zn - add x3, sn - add x4, p - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_wmontend -curve25519_x25519_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_wmontloop -curve25519_x25519_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_zmontend -curve25519_x25519_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_zmontloop -curve25519_x25519_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_negskip1 -curve25519_x25519_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_negloop1 -curve25519_x25519_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_negskip2 -curve25519_x25519_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_negloop2 -curve25519_x25519_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519_outerloop +// Prepare to call the modular inverse function to get xm = 1/zn + + add x0, xm + add x1, zn + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, xm and zn. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519_invmidloop +curve25519_x25519_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a @@ -1221,7 +1911,7 @@ curve25519_x25519_zfliploop: // Now the result is xn * (1/zn), fully reduced modulo p. - mul_p25519(resx,xn,zm) + mul_p25519(resx,xn,xm) // Restore stack and registers diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 4e9b91b48e..858e74185b 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -633,356 +633,1046 @@ curve25519_x25519_alt_scalarloop: mul_p25519(zn,p,e) // The projective result of the scalar multiplication is now (xn,zn). -// First set up the constant sn = 2^255 - 19 for the modular inverse. - - mov x0, #-19 - mov x1, #-1 - mov x2, #0x7fffffffffffffff - stp x0, x1, [sn] - stp x1, x2, [sn+16] - -// Prepare to call the modular inverse function to get zm = 1/zn - - mov x0, #4 - add x1, zm - add x2, zn - add x3, sn - add x4, p - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519_alt_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_alt_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519_alt_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_alt_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_alt_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519_alt_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519_alt_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_alt_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_alt_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_alt_wmontend -curve25519_x25519_alt_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_wmontloop -curve25519_x25519_alt_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519_alt_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519_alt_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_alt_zmontend -curve25519_x25519_alt_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_zmontloop -curve25519_x25519_alt_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519_alt_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519_alt_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_alt_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_alt_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_alt_negskip1 -curve25519_x25519_alt_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_alt_negloop1 -curve25519_x25519_alt_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_alt_negskip2 -curve25519_x25519_alt_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_alt_negloop2 -curve25519_x25519_alt_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519_alt_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519_alt_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_alt_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519_alt_outerloop +// Prepare to call the modular inverse function to get xm = 1/zn + + add x0, xm + add x1, zn + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, xm and zn. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519_alt_invmidloop +curve25519_x25519_alt_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519_alt_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519_alt_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a @@ -1005,7 +1695,7 @@ curve25519_x25519_alt_zfliploop: // Now the result is xn * (1/zn), fully reduced modulo p. - mul_p25519(resx,xn,zm) + mul_p25519(resx,xn,xm) // Restore stack and registers diff --git a/arm/curve25519/curve25519_x25519_byte.S b/arm/curve25519/curve25519_x25519_byte.S index d64eb73ed2..ede1bd1ee2 100644 --- a/arm/curve25519/curve25519_x25519_byte.S +++ b/arm/curve25519/curve25519_x25519_byte.S @@ -967,356 +967,1046 @@ curve25519_x25519_byte_scalarloop: mul_p25519(zn,p,e) // The projective result of the scalar multiplication is now (xn,zn). -// First set up the constant sn = 2^255 - 19 for the modular inverse. - - mov x0, #-19 - mov x1, #-1 - mov x2, #0x7fffffffffffffff - stp x0, x1, [sn] - stp x1, x2, [sn+16] - -// Prepare to call the modular inverse function to get zm = 1/zn - - mov x0, #4 - add x1, zm - add x2, zn - add x3, sn - add x4, p - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519_byte_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_byte_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519_byte_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_byte_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_byte_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519_byte_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519_byte_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_byte_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_byte_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_byte_wmontend -curve25519_x25519_byte_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_wmontloop -curve25519_x25519_byte_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519_byte_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519_byte_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_byte_zmontend -curve25519_x25519_byte_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_zmontloop -curve25519_x25519_byte_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519_byte_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519_byte_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_byte_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_byte_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_byte_negskip1 -curve25519_x25519_byte_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_byte_negloop1 -curve25519_x25519_byte_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_byte_negskip2 -curve25519_x25519_byte_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_byte_negloop2 -curve25519_x25519_byte_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519_byte_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519_byte_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519_byte_outerloop +// Prepare to call the modular inverse function to get xm = 1/zn + + add x0, xm + add x1, zn + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, xm and zn. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519_byte_invmidloop +curve25519_x25519_byte_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519_byte_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519_byte_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a @@ -1339,7 +2029,7 @@ curve25519_x25519_byte_zfliploop: // Now the result is xn * (1/zn), fully reduced modulo p. - mul_p25519(zn,xn,zm) + mul_p25519(zn,xn,xm) ldp x10, x11, [zn] strb w10, [resx] diff --git a/arm/curve25519/curve25519_x25519_byte_alt.S b/arm/curve25519/curve25519_x25519_byte_alt.S index 7f79cfd803..03211203cf 100644 --- a/arm/curve25519/curve25519_x25519_byte_alt.S +++ b/arm/curve25519/curve25519_x25519_byte_alt.S @@ -751,356 +751,1046 @@ curve25519_x25519_byte_alt_scalarloop: mul_p25519(zn,p,e) // The projective result of the scalar multiplication is now (xn,zn). -// First set up the constant sn = 2^255 - 19 for the modular inverse. - - mov x0, #-19 - mov x1, #-1 - mov x2, #0x7fffffffffffffff - stp x0, x1, [sn] - stp x1, x2, [sn+16] - -// Prepare to call the modular inverse function to get zm = 1/zn - - mov x0, #4 - add x1, zm - add x2, zn - add x3, sn - add x4, p - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519_byte_alt_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_byte_alt_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519_byte_alt_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_byte_alt_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_byte_alt_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519_byte_alt_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519_byte_alt_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_byte_alt_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519_byte_alt_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_byte_alt_wmontend -curve25519_x25519_byte_alt_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_wmontloop -curve25519_x25519_byte_alt_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519_byte_alt_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519_byte_alt_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519_byte_alt_zmontend -curve25519_x25519_byte_alt_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_zmontloop -curve25519_x25519_byte_alt_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519_byte_alt_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519_byte_alt_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519_byte_alt_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519_byte_alt_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_byte_alt_negskip1 -curve25519_x25519_byte_alt_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_byte_alt_negloop1 -curve25519_x25519_byte_alt_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519_byte_alt_negskip2 -curve25519_x25519_byte_alt_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519_byte_alt_negloop2 -curve25519_x25519_byte_alt_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519_byte_alt_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519_byte_alt_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519_byte_alt_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519_byte_alt_outerloop +// Prepare to call the modular inverse function to get xm = 1/zn + + add x0, xm + add x1, zn + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, xm and zn. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519_byte_alt_invmidloop +curve25519_x25519_byte_alt_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519_byte_alt_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519_byte_alt_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a @@ -1123,7 +1813,7 @@ curve25519_x25519_byte_alt_zfliploop: // Now the result is xn * (1/zn), fully reduced modulo p. - mul_p25519(zn,xn,zm) + mul_p25519(zn,xn,xm) ldp x10, x11, [zn] strb w10, [resx] diff --git a/arm/curve25519/curve25519_x25519base.S b/arm/curve25519/curve25519_x25519base.S index 030fa08e24..b9c3b8e34a 100644 --- a/arm/curve25519/curve25519_x25519base.S +++ b/arm/curve25519/curve25519_x25519base.S @@ -907,360 +907,1058 @@ curve25519_x25519base_scalarloop: // // First the addition and subtraction: - add_twice4(y_3,x_3,w_3) - sub_twice4(z_3,x_3,w_3) + add_twice4(t1,x_3,w_3) + sub_twice4(t2,x_3,w_3) -// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Prepare to call the modular inverse function to get t0 = 1/t2 // Note that this works for the weakly normalized z_3 equally well. // The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. - mov x0, 4 - add x1, x_3 - add x2, z_3 - adr x3, curve25519_x25519base_p_25519 - add x4, tmpspace - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519base_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519base_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519base_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519base_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_wmontend -curve25519_x25519base_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_wmontloop -curve25519_x25519base_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519base_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_zmontend -curve25519_x25519base_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_zmontloop -curve25519_x25519base_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519base_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_negskip1 -curve25519_x25519base_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_negloop1 -curve25519_x25519base_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_negskip2 -curve25519_x25519base_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_negloop2 -curve25519_x25519base_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519base_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519base_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519base_outerloop + add x0, t0 + add x1, t2 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, t0, t1, t2. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519base_invmidloop +curve25519_x25519base_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519base_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519base_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that // fully reduces modulo p_25519 since now we want the canonical // answer as output. - mul_p25519(resx,y_3,x_3) + mul_p25519(resx,t1,t0) // Restore stack and registers @@ -1279,14 +1977,6 @@ curve25519_x25519base_zfliploop: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -curve25519_x25519base_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/arm/curve25519/curve25519_x25519base_alt.S b/arm/curve25519/curve25519_x25519base_alt.S index 97d2e9c54f..22de69f4c3 100644 --- a/arm/curve25519/curve25519_x25519base_alt.S +++ b/arm/curve25519/curve25519_x25519base_alt.S @@ -749,360 +749,1058 @@ curve25519_x25519base_alt_scalarloop: // // First the addition and subtraction: - add_twice4(y_3,x_3,w_3) - sub_twice4(z_3,x_3,w_3) + add_twice4(t1,x_3,w_3) + sub_twice4(t2,x_3,w_3) -// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Prepare to call the modular inverse function to get t0 = 1/t2 // Note that this works for the weakly normalized z_3 equally well. // The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. - mov x0, 4 - add x1, x_3 - add x2, z_3 - adr x3, curve25519_x25519base_alt_p_25519 - add x4, tmpspace - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519base_alt_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_alt_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519base_alt_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_alt_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_alt_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519base_alt_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519base_alt_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_alt_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_alt_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_alt_wmontend -curve25519_x25519base_alt_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_wmontloop -curve25519_x25519base_alt_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_alt_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519base_alt_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_alt_zmontend -curve25519_x25519base_alt_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_zmontloop -curve25519_x25519base_alt_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_alt_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519base_alt_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_alt_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_alt_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_alt_negskip1 -curve25519_x25519base_alt_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_alt_negloop1 -curve25519_x25519base_alt_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_alt_negskip2 -curve25519_x25519base_alt_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_alt_negloop2 -curve25519_x25519base_alt_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519base_alt_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519base_alt_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_alt_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519base_alt_outerloop + add x0, t0 + add x1, t2 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, t0, t1, t2. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519base_alt_invmidloop +curve25519_x25519base_alt_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519base_alt_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519base_alt_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that // fully reduces modulo p_25519 since now we want the canonical // answer as output. - mul_p25519(resx,y_3,x_3) + mul_p25519(resx,t1,t0) // Restore stack and registers @@ -1121,14 +1819,6 @@ curve25519_x25519base_alt_zfliploop: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -curve25519_x25519base_alt_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/arm/curve25519/curve25519_x25519base_byte.S b/arm/curve25519/curve25519_x25519base_byte.S index b6d95f58c9..aecc693c66 100644 --- a/arm/curve25519/curve25519_x25519base_byte.S +++ b/arm/curve25519/curve25519_x25519base_byte.S @@ -966,360 +966,1058 @@ curve25519_x25519base_byte_scalarloop: // // First the addition and subtraction: - add_twice4(y_3,x_3,w_3) - sub_twice4(z_3,x_3,w_3) + add_twice4(t1,x_3,w_3) + sub_twice4(t2,x_3,w_3) -// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Prepare to call the modular inverse function to get t0 = 1/t2 // Note that this works for the weakly normalized z_3 equally well. // The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. - mov x0, 4 - add x1, x_3 - add x2, z_3 - adr x3, curve25519_x25519base_byte_p_25519 - add x4, tmpspace - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519base_byte_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_byte_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519base_byte_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_byte_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_byte_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519base_byte_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519base_byte_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_byte_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_byte_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_byte_wmontend -curve25519_x25519base_byte_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_wmontloop -curve25519_x25519base_byte_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_byte_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519base_byte_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_byte_zmontend -curve25519_x25519base_byte_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_zmontloop -curve25519_x25519base_byte_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_byte_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519base_byte_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_byte_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_byte_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_byte_negskip1 -curve25519_x25519base_byte_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_byte_negloop1 -curve25519_x25519base_byte_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_byte_negskip2 -curve25519_x25519base_byte_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_byte_negloop2 -curve25519_x25519base_byte_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519base_byte_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519base_byte_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519base_byte_outerloop + add x0, t0 + add x1, t2 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, t0, t1, t2. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519base_byte_invmidloop +curve25519_x25519base_byte_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519base_byte_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519base_byte_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that // fully reduces modulo p_25519 since now we want the canonical // answer as output. - mul_p25519(x_1,y_3,x_3) + mul_p25519(x_1,t1,t0) ldp x10, x11, [x_1] strb w10, [resx] @@ -1405,14 +2103,6 @@ curve25519_x25519base_byte_zfliploop: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -curve25519_x25519base_byte_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/arm/curve25519/curve25519_x25519base_byte_alt.S b/arm/curve25519/curve25519_x25519base_byte_alt.S index 6e61199732..9c9dca518c 100644 --- a/arm/curve25519/curve25519_x25519base_byte_alt.S +++ b/arm/curve25519/curve25519_x25519base_byte_alt.S @@ -805,363 +805,1059 @@ curve25519_x25519base_byte_alt_scalarloop: // the Montgomery point at infinity, and Edwards (0,-1) which maps to // Montgomery (0,0) [this is the 2-torsion point] are both by definition // mapped to 0 by the X coordinate mapping used to define curve25519. -// -// First the addition and subtraction: - add_twice4(y_3,x_3,w_3) - sub_twice4(z_3,x_3,w_3) + add_twice4(t1,x_3,w_3) + sub_twice4(t2,x_3,w_3) -// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Prepare to call the modular inverse function to get t0 = 1/t2 // Note that this works for the weakly normalized z_3 equally well. // The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. - mov x0, 4 - add x1, x_3 - add x2, z_3 - adr x3, curve25519_x25519base_byte_alt_p_25519 - add x4, tmpspace - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -curve25519_x25519base_byte_alt_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_byte_alt_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -curve25519_x25519base_byte_alt_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_byte_alt_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_byte_alt_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -curve25519_x25519base_byte_alt_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, curve25519_x25519base_byte_alt_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_byte_alt_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc curve25519_x25519base_byte_alt_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_byte_alt_wmontend -curve25519_x25519base_byte_alt_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_wmontloop -curve25519_x25519base_byte_alt_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_byte_alt_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -curve25519_x25519base_byte_alt_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, curve25519_x25519base_byte_alt_zmontend -curve25519_x25519base_byte_alt_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_zmontloop -curve25519_x25519base_byte_alt_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -curve25519_x25519base_byte_alt_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -curve25519_x25519base_byte_alt_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -curve25519_x25519base_byte_alt_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc curve25519_x25519base_byte_alt_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_byte_alt_negskip1 -curve25519_x25519base_byte_alt_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_byte_alt_negloop1 -curve25519_x25519base_byte_alt_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, curve25519_x25519base_byte_alt_negskip2 -curve25519_x25519base_byte_alt_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, curve25519_x25519base_byte_alt_negloop2 -curve25519_x25519base_byte_alt_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -curve25519_x25519base_byte_alt_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -curve25519_x25519base_byte_alt_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, curve25519_x25519base_byte_alt_zfliploop - subs x2, x2, #0x3a - b.hi curve25519_x25519base_byte_alt_outerloop + add x0, t0 + add x1, t2 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, t0, t1, t2. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b curve25519_x25519base_byte_alt_invmidloop +curve25519_x25519base_byte_alt_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +curve25519_x25519base_byte_alt_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne curve25519_x25519base_byte_alt_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that // fully reduces modulo p_25519 since now we want the canonical // answer as output. - mul_p25519(x_1,y_3,x_3) + mul_p25519(x_1,t1,t0) ldp x10, x11, [x_1] strb w10, [resx] @@ -1229,6 +1925,7 @@ curve25519_x25519base_byte_alt_zfliploop: lsr x13, x13, #8 strb w13, [resx+31] + // Restore stack and registers add sp, sp, #NSPACE @@ -1246,14 +1943,6 @@ curve25519_x25519base_byte_alt_zfliploop: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -curve25519_x25519base_byte_alt_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 2a97ee9407..4a8351eaa3 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -65,12 +65,12 @@ #define sn (4*NUMSIZE)(%rsp) -#define zn (5*NUMSIZE)(%rsp) #define dn (5*NUMSIZE)(%rsp) #define e (5*NUMSIZE)(%rsp) #define dmsn (6*NUMSIZE)(%rsp) #define p (6*NUMSIZE)(%rsp) +#define zn (7*NUMSIZE)(%rsp) #define xm (8*NUMSIZE)(%rsp) #define dnsm (8*NUMSIZE)(%rsp) @@ -790,430 +790,1372 @@ curve25519_x25519_scalarloop: mul_p25519(zn,p,e) // The projective result of the scalar multiplication is now (xn,zn). -// First set up the constant sn = 2^255 - 19 for the modular inverse. - - movq $-19, %rax - movq $-1, %rcx - movq $0x7fffffffffffffff, %rdx - movq %rax, 128(%rsp) - movq %rcx, 136(%rsp) - movq %rcx, 144(%rsp) - movq %rdx, 152(%rsp) - -// Prepare to call the modular inverse function to get zm = 1/zn - - movq $4, %rdi - leaq 96(%rsp), %rsi - leaq 160(%rsp), %rdx - leaq 128(%rsp), %rcx - leaq 192(%rsp), %r8 - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites pointx, scalar and dm, which are no longer needed. - - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp +// Prepare to call the modular inverse function to get xm = 1/zn + + leaq 256(%rsp), %rdi + leaq 224(%rsp), %rsi + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, xm and zn. + + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -curve25519_x25519_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -curve25519_x25519_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -curve25519_x25519_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp curve25519_x25519_midloop +curve25519_x25519_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne curve25519_x25519_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519_wmontend -curve25519_x25519_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne curve25519_x25519_wmontloop -curve25519_x25519_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_wcorrloop + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519_zmontend -curve25519_x25519_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne curve25519_x25519_zmontloop -curve25519_x25519_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -curve25519_x25519_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +curve25519_x25519_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -curve25519_x25519_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -curve25519_x25519_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne curve25519_x25519_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -curve25519_x25519_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_fliploop - subq $0x3a, 0x20(%rsp) - ja curve25519_x25519_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne curve25519_x25519_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a // dependency on the behavior of modular inverse in out-of-scope cases. - movq 160(%rsp), %rax - orq 168(%rsp), %rax - orq 176(%rsp), %rax - orq 184(%rsp), %rax + movq 224(%rsp), %rax + orq 232(%rsp), %rax + orq 240(%rsp), %rax + orq 248(%rsp), %rax movq 320(%rsp), %rcx cmovzq %rax, %rcx movq %rcx, 320(%rsp) @@ -1230,7 +2172,7 @@ curve25519_x25519_fliploop: // Now the result is xn * (1/zn), fully reduced modulo p. movq res, %rbp - mul_p25519(resx,xn,zm) + mul_p25519(resx,xn,xm) // Restore stack and registers diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 241c4505af..a855478a6e 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -65,12 +65,12 @@ #define sn (4*NUMSIZE)(%rsp) -#define zn (5*NUMSIZE)(%rsp) #define dn (5*NUMSIZE)(%rsp) #define e (5*NUMSIZE)(%rsp) #define dmsn (6*NUMSIZE)(%rsp) #define p (6*NUMSIZE)(%rsp) +#define zn (7*NUMSIZE)(%rsp) #define xm (8*NUMSIZE)(%rsp) #define dnsm (8*NUMSIZE)(%rsp) @@ -951,430 +951,1372 @@ curve25519_x25519_alt_scalarloop: mul_p25519(zn,p,e) // The projective result of the scalar multiplication is now (xn,zn). -// First set up the constant sn = 2^255 - 19 for the modular inverse. - - movq $-19, %rax - movq $-1, %rcx - movq $0x7fffffffffffffff, %rdx - movq %rax, 128(%rsp) - movq %rcx, 136(%rsp) - movq %rcx, 144(%rsp) - movq %rdx, 152(%rsp) - -// Prepare to call the modular inverse function to get zm = 1/zn - - movq $4, %rdi - leaq 96(%rsp), %rsi - leaq 160(%rsp), %rdx - leaq 128(%rsp), %rcx - leaq 192(%rsp), %r8 - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites pointx, scalar and dm, which are no longer needed. - - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519_alt_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_alt_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp +// Prepare to call the modular inverse function to get xm = 1/zn + + leaq 256(%rsp), %rdi + leaq 224(%rsp), %rsi + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, xm and zn. + + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -curve25519_x25519_alt_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -curve25519_x25519_alt_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519_alt_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -curve25519_x25519_alt_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp curve25519_x25519_alt_midloop +curve25519_x25519_alt_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne curve25519_x25519_alt_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519_alt_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_alt_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519_alt_wmontend -curve25519_x25519_alt_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne curve25519_x25519_alt_wmontloop -curve25519_x25519_alt_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519_alt_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519_alt_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519_alt_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_alt_wcorrloop + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519_alt_zmontend -curve25519_x25519_alt_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne curve25519_x25519_alt_zmontloop -curve25519_x25519_alt_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519_alt_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519_alt_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519_alt_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_alt_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -curve25519_x25519_alt_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +curve25519_x25519_alt_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519_alt_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -curve25519_x25519_alt_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519_alt_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -curve25519_x25519_alt_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne curve25519_x25519_alt_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -curve25519_x25519_alt_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519_alt_fliploop - subq $0x3a, 0x20(%rsp) - ja curve25519_x25519_alt_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne curve25519_x25519_alt_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // Since we eventually want to return 0 when the result is the point at // infinity, we force xn = 0 whenever zn = 0. This avoids building in a // dependency on the behavior of modular inverse in out-of-scope cases. - movq 160(%rsp), %rax - orq 168(%rsp), %rax - orq 176(%rsp), %rax - orq 184(%rsp), %rax + movq 224(%rsp), %rax + orq 232(%rsp), %rax + orq 240(%rsp), %rax + orq 248(%rsp), %rax movq 320(%rsp), %rcx cmovzq %rax, %rcx movq %rcx, 320(%rsp) @@ -1391,7 +2333,7 @@ curve25519_x25519_alt_fliploop: // Now the result is xn * (1/zn), fully reduced modulo p. movq res, %rbp - mul_p25519(resx,xn,zm) + mul_p25519(resx,xn,xm) // Restore stack and registers diff --git a/x86_att/curve25519/curve25519_x25519base.S b/x86_att/curve25519/curve25519_x25519base.S index 12a5cddd18..e450656861 100644 --- a/x86_att/curve25519/curve25519_x25519base.S +++ b/x86_att/curve25519/curve25519_x25519base.S @@ -874,416 +874,1368 @@ curve25519_x25519base_scalarloop: // // First the addition and subtraction: - add_twice4(y_3,x_3,w_3) - sub_twice4(z_3,x_3,w_3) + add_twice4(t1,x_3,w_3) + sub_twice4(t2,x_3,w_3) -// Prepare to call the modular inverse function to get x_3 = 1/z_3 +// Prepare to call the modular inverse function to get t0 = 1/t2 // Note that this works for the weakly normalized z_3 equally well. // The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. - movq $4, %rdi - leaq 128(%rsp), %rsi - leaq 192(%rsp), %rdx - leaq curve25519_x25519base_p_25519(%rip), %rcx - leaq 256(%rsp), %r8 - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites local variables that are no longer needed. - - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519base_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp + leaq 256(%rsp), %rdi + leaq 320(%rsp), %rsi + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, t0, t1, t2. + + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -curve25519_x25519base_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -curve25519_x25519base_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519base_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -curve25519_x25519base_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp curve25519_x25519base_midloop +curve25519_x25519base_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne curve25519_x25519base_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519base_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519base_wmontend -curve25519_x25519base_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne curve25519_x25519base_wmontloop -curve25519_x25519base_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519base_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519base_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519base_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_wcorrloop + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519base_zmontend -curve25519_x25519base_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne curve25519_x25519base_zmontloop -curve25519_x25519base_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519base_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519base_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519base_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -curve25519_x25519base_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +curve25519_x25519base_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519base_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -curve25519_x25519base_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519base_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -curve25519_x25519base_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne curve25519_x25519base_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -curve25519_x25519base_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_fliploop - subq $0x3a, 0x20(%rsp) - ja curve25519_x25519base_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne curve25519_x25519base_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1291,7 +2243,7 @@ curve25519_x25519base_fliploop: // answer as output. movq res, %rbp - mul_p25519(resx,y_3,x_3) + mul_p25519(resx,t1,t0) // Restore stack and registers @@ -1313,14 +2265,6 @@ curve25519_x25519base_fliploop: // .section .rodata // **************************************************************************** -// The modulus, for the modular inverse - -curve25519_x25519base_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/x86_att/curve25519/curve25519_x25519base_alt.S b/x86_att/curve25519/curve25519_x25519base_alt.S index 8a89b1f597..b1275e2084 100644 --- a/x86_att/curve25519/curve25519_x25519base_alt.S +++ b/x86_att/curve25519/curve25519_x25519base_alt.S @@ -950,414 +950,1368 @@ curve25519_x25519base_alt_scalarloop: // // First the addition and subtraction: - add_twice4(y_3,x_3,w_3) - sub_twice4(z_3,x_3,w_3) - -// Prepare to call the modular inverse function to get x_3 = 1/z_3 - - movq $4, %rdi - leaq 128(%rsp), %rsi - leaq 192(%rsp), %rdx - leaq curve25519_x25519base_alt_p_25519(%rip), %rcx - leaq 256(%rsp), %r8 - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites local variables that are no longer needed. - - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519base_alt_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_alt_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp + add_twice4(t1,x_3,w_3) + sub_twice4(t2,x_3,w_3) + +// Prepare to call the modular inverse function to get t0 = 1/t2 +// Note that this works for the weakly normalized z_3 equally well. +// The non-coprime case z_3 == 0 (mod p_25519) cannot arise anyway. + + leaq 256(%rsp), %rdi + leaq 320(%rsp), %rsi + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, t0, t1, t2. + + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -curve25519_x25519base_alt_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -curve25519_x25519base_alt_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519base_alt_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -curve25519_x25519base_alt_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp curve25519_x25519base_alt_midloop +curve25519_x25519base_alt_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne curve25519_x25519base_alt_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -curve25519_x25519base_alt_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_alt_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519base_alt_wmontend -curve25519_x25519base_alt_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne curve25519_x25519base_alt_wmontloop -curve25519_x25519base_alt_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519base_alt_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519base_alt_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519base_alt_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_alt_wcorrloop + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je curve25519_x25519base_alt_zmontend -curve25519_x25519base_alt_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne curve25519_x25519base_alt_zmontloop -curve25519_x25519base_alt_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -curve25519_x25519base_alt_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne curve25519_x25519base_alt_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -curve25519_x25519base_alt_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_alt_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -curve25519_x25519base_alt_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +curve25519_x25519base_alt_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519base_alt_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -curve25519_x25519base_alt_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb curve25519_x25519base_alt_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -curve25519_x25519base_alt_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne curve25519_x25519base_alt_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -curve25519_x25519base_alt_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb curve25519_x25519base_alt_fliploop - subq $0x3a, 0x20(%rsp) - ja curve25519_x25519base_alt_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne curve25519_x25519base_alt_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // The final result is (X + T) / (X - T) // This is the only operation in the whole computation that @@ -1365,7 +2319,7 @@ curve25519_x25519base_alt_fliploop: // answer as output. movq res, %rbp - mul_p25519(resx,y_3,x_3) + mul_p25519(resx,t1,t0) // Restore stack and registers @@ -1387,14 +2341,6 @@ curve25519_x25519base_alt_fliploop: // .section .rodata // **************************************************************************** -// The modulus, for the modular inverse - -curve25519_x25519base_alt_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 2^254 * G and (2^254 + 8) * G in extended-projective coordinates // but with z = 1 assumed and hence left out, so they are (X,Y,T) only. From b8ee5c07c2e6a64f104b542218fccc3c0751ae3b Mon Sep 17 00:00:00 2001 From: Torben Hansen <50673096+torben-hansen@users.noreply.github.com> Date: Tue, 31 Oct 2023 19:07:06 -0700 Subject: [PATCH 41/42] Document that x25519 function does not implement zero-check s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/8d4c2e4156958ee04e3388548250377544685c9a --- arm/curve25519/curve25519_x25519.S | 3 ++- arm/curve25519/curve25519_x25519_alt.S | 3 ++- arm/curve25519/curve25519_x25519_byte.S | 3 ++- arm/curve25519/curve25519_x25519_byte_alt.S | 3 ++- x86_att/curve25519/curve25519_x25519.S | 3 ++- x86_att/curve25519/curve25519_x25519_alt.S | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arm/curve25519/curve25519_x25519.S b/arm/curve25519/curve25519_x25519.S index d66884d5d4..0b7ec7a111 100644 --- a/arm/curve25519/curve25519_x25519.S +++ b/arm/curve25519/curve25519_x25519.S @@ -13,7 +13,8 @@ // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the // point at infinity. Both n and X inputs are first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); -// in particular the lower three bits of n are set to zero. +// in particular the lower three bits of n are set to zero. Does not implement +// the zero-check specified in Section 6.1. // // Standard ARM ABI: X0 = res, X1 = scalar, X2 = point // ---------------------------------------------------------------------------- diff --git a/arm/curve25519/curve25519_x25519_alt.S b/arm/curve25519/curve25519_x25519_alt.S index 4e9b91b48e..3a521a602f 100644 --- a/arm/curve25519/curve25519_x25519_alt.S +++ b/arm/curve25519/curve25519_x25519_alt.S @@ -13,7 +13,8 @@ // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the // point at infinity. Both n and X inputs are first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); -// in particular the lower three bits of n are set to zero. +// in particular the lower three bits of n are set to zero. Does not implement +// the zero-check specified in Section 6.1. // // Standard ARM ABI: X0 = res, X1 = scalar, X2 = point // ---------------------------------------------------------------------------- diff --git a/arm/curve25519/curve25519_x25519_byte.S b/arm/curve25519/curve25519_x25519_byte.S index d64eb73ed2..6162a38082 100644 --- a/arm/curve25519/curve25519_x25519_byte.S +++ b/arm/curve25519/curve25519_x25519_byte.S @@ -13,7 +13,8 @@ // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the // point at infinity. Both n and X inputs are first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); -// in particular the lower three bits of n are set to zero. +// in particular the lower three bits of n are set to zero. Does not implement +// the zero-check specified in Section 6.1. // // Standard ARM ABI: X0 = res, X1 = scalar, X2 = point // ---------------------------------------------------------------------------- diff --git a/arm/curve25519/curve25519_x25519_byte_alt.S b/arm/curve25519/curve25519_x25519_byte_alt.S index 7f79cfd803..f59e611467 100644 --- a/arm/curve25519/curve25519_x25519_byte_alt.S +++ b/arm/curve25519/curve25519_x25519_byte_alt.S @@ -13,7 +13,8 @@ // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the // point at infinity. Both n and X inputs are first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); -// in particular the lower three bits of n are set to zero. +// in particular the lower three bits of n are set to zero. Does not implement +// the zero-check specified in Section 6.1. // // Standard ARM ABI: X0 = res, X1 = scalar, X2 = point // ---------------------------------------------------------------------------- diff --git a/x86_att/curve25519/curve25519_x25519.S b/x86_att/curve25519/curve25519_x25519.S index 2a97ee9407..9914fdd01c 100644 --- a/x86_att/curve25519/curve25519_x25519.S +++ b/x86_att/curve25519/curve25519_x25519.S @@ -20,7 +20,8 @@ // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the // point at infinity. Both n and X inputs are first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); -// in particular the lower three bits of n are set to zero. +// in particular the lower three bits of n are set to zero. Does not implement +// the zero-check specified in Section 6.1. // // Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point // Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point diff --git a/x86_att/curve25519/curve25519_x25519_alt.S b/x86_att/curve25519/curve25519_x25519_alt.S index 241c4505af..ca92a9206a 100644 --- a/x86_att/curve25519/curve25519_x25519_alt.S +++ b/x86_att/curve25519/curve25519_x25519_alt.S @@ -20,7 +20,8 @@ // this returns the X coordinate of n * P = (X, Y), or 0 when n * P is the // point at infinity. Both n and X inputs are first slightly modified/mangled // as specified in the relevant RFC (https://www.rfc-editor.org/rfc/rfc7748); -// in particular the lower three bits of n are set to zero. +// in particular the lower three bits of n are set to zero. Does not implement +// the zero-check specified in Section 6.1. // // Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point // Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point From e618f26dc893ee780dfa01b2f08f93035b12e780 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 1 Nov 2023 18:52:23 -0700 Subject: [PATCH 42/42] Switch edwards25519 operations to divstep-based modular inverse This replaces the inlined variant of "bignum_modinv" with code from "bignum_inv_p25519" in all "edwards25519_scalarmul*" functions. Again, there are consequential changes related to the slightly different amount of temporary storage needed by bignum_inv_p25519. s2n-bignum original commit: https://github.com/awslabs/s2n-bignum/commit/7e7b18e8fc83fa25131cfac1c94bd83fbf6cd243 --- arm/curve25519/edwards25519_scalarmulbase.S | 1387 +++++++++--- .../edwards25519_scalarmulbase_alt.S | 1387 +++++++++--- arm/curve25519/edwards25519_scalarmuldouble.S | 1401 +++++++++--- .../edwards25519_scalarmuldouble_alt.S | 1401 +++++++++--- .../curve25519/edwards25519_scalarmulbase.S | 1877 +++++++++++---- .../edwards25519_scalarmulbase_alt.S | 1877 +++++++++++---- .../curve25519/edwards25519_scalarmuldouble.S | 2005 ++++++++++++----- .../edwards25519_scalarmuldouble_alt.S | 2005 ++++++++++++----- 8 files changed, 9956 insertions(+), 3384 deletions(-) diff --git a/arm/curve25519/edwards25519_scalarmulbase.S b/arm/curve25519/edwards25519_scalarmulbase.S index 6ca092489f..8c9d0f9193 100644 --- a/arm/curve25519/edwards25519_scalarmulbase.S +++ b/arm/curve25519/edwards25519_scalarmulbase.S @@ -956,346 +956,1045 @@ edwards25519_scalarmulbase_scalarloop: // (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means // first calling the modular inverse to get w_3 = 1/z_3. - mov x0, 4 - add x1, w_3 - add x2, z_3 - adr x3, edwards25519_scalarmulbase_p_25519 - add x4, tmpspace - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -edwards25519_scalarmulbase_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmulbase_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -edwards25519_scalarmulbase_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmulbase_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmulbase_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -edwards25519_scalarmulbase_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, edwards25519_scalarmulbase_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmulbase_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmulbase_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmulbase_wmontend -edwards25519_scalarmulbase_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_wmontloop -edwards25519_scalarmulbase_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmulbase_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -edwards25519_scalarmulbase_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmulbase_zmontend -edwards25519_scalarmulbase_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_zmontloop -edwards25519_scalarmulbase_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmulbase_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -edwards25519_scalarmulbase_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmulbase_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmulbase_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmulbase_negskip1 -edwards25519_scalarmulbase_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmulbase_negloop1 -edwards25519_scalarmulbase_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmulbase_negskip2 -edwards25519_scalarmulbase_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmulbase_negloop2 -edwards25519_scalarmulbase_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -edwards25519_scalarmulbase_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -edwards25519_scalarmulbase_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_zfliploop - subs x2, x2, #0x3a - b.hi edwards25519_scalarmulbase_outerloop + add x0, w_3 + add x1, z_3 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, w_3, x_3 +// and y_3. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b edwards25519_scalarmulbase_invmidloop +edwards25519_scalarmulbase_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +edwards25519_scalarmulbase_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne edwards25519_scalarmulbase_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1322,14 +2021,6 @@ edwards25519_scalarmulbase_zfliploop: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -edwards25519_scalarmulbase_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/arm/curve25519/edwards25519_scalarmulbase_alt.S b/arm/curve25519/edwards25519_scalarmulbase_alt.S index e8dd9114a4..03e5598f2c 100644 --- a/arm/curve25519/edwards25519_scalarmulbase_alt.S +++ b/arm/curve25519/edwards25519_scalarmulbase_alt.S @@ -798,346 +798,1045 @@ edwards25519_scalarmulbase_alt_scalarloop: // (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means // first calling the modular inverse to get w_3 = 1/z_3. - mov x0, 4 - add x1, w_3 - add x2, z_3 - adr x3, edwards25519_scalarmulbase_alt_p_25519 - add x4, tmpspace - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -edwards25519_scalarmulbase_alt_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmulbase_alt_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -edwards25519_scalarmulbase_alt_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmulbase_alt_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmulbase_alt_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -edwards25519_scalarmulbase_alt_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, edwards25519_scalarmulbase_alt_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmulbase_alt_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmulbase_alt_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmulbase_alt_wmontend -edwards25519_scalarmulbase_alt_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_wmontloop -edwards25519_scalarmulbase_alt_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmulbase_alt_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -edwards25519_scalarmulbase_alt_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmulbase_alt_zmontend -edwards25519_scalarmulbase_alt_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_zmontloop -edwards25519_scalarmulbase_alt_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmulbase_alt_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -edwards25519_scalarmulbase_alt_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmulbase_alt_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmulbase_alt_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmulbase_alt_negskip1 -edwards25519_scalarmulbase_alt_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmulbase_alt_negloop1 -edwards25519_scalarmulbase_alt_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmulbase_alt_negskip2 -edwards25519_scalarmulbase_alt_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmulbase_alt_negloop2 -edwards25519_scalarmulbase_alt_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -edwards25519_scalarmulbase_alt_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -edwards25519_scalarmulbase_alt_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmulbase_alt_zfliploop - subs x2, x2, #0x3a - b.hi edwards25519_scalarmulbase_alt_outerloop + add x0, w_3 + add x1, z_3 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, w_3, x_3 +// and y_3. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b edwards25519_scalarmulbase_alt_invmidloop +edwards25519_scalarmulbase_alt_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +edwards25519_scalarmulbase_alt_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne edwards25519_scalarmulbase_alt_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1164,14 +1863,6 @@ edwards25519_scalarmulbase_alt_zfliploop: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -edwards25519_scalarmulbase_alt_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. diff --git a/arm/curve25519/edwards25519_scalarmuldouble.S b/arm/curve25519/edwards25519_scalarmuldouble.S index cd760f1212..00ea37eaaf 100644 --- a/arm/curve25519/edwards25519_scalarmuldouble.S +++ b/arm/curve25519/edwards25519_scalarmuldouble.S @@ -57,14 +57,14 @@ #define scalar sp, #(0*NUMSIZE) #define bscalar sp, #(1*NUMSIZE) -#define acc sp, #(2*NUMSIZE) -#define acc_x sp, #(2*NUMSIZE) -#define acc_y sp, #(3*NUMSIZE) -#define acc_z sp, #(4*NUMSIZE) -#define acc_w sp, #(5*NUMSIZE) +#define btabent sp, #(2*NUMSIZE) +#define acc sp, #(5*NUMSIZE) +#define acc_x sp, #(5*NUMSIZE) +#define acc_y sp, #(6*NUMSIZE) +#define acc_z sp, #(7*NUMSIZE) +#define acc_w sp, #(8*NUMSIZE) -#define tabent sp, #(6*NUMSIZE) -#define btabent sp, #(10*NUMSIZE) +#define tabent sp, #(9*NUMSIZE) #define tab sp, #(13*NUMSIZE) @@ -1872,347 +1872,1044 @@ edwards25519_scalarmuldouble_loop: // Modular inverse setup - mov x0, #4 - add x1, tabent - add x2, acc+64 - adr x3, edwards25519_scalarmuldouble_p25519 - add x4, btabent - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - -edwards25519_scalarmuldouble_modinv: - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -edwards25519_scalarmuldouble_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmuldouble_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -edwards25519_scalarmuldouble_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmuldouble_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmuldouble_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -edwards25519_scalarmuldouble_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, edwards25519_scalarmuldouble_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmuldouble_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmuldouble_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmuldouble_wmontend -edwards25519_scalarmuldouble_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_wmontloop -edwards25519_scalarmuldouble_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmuldouble_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -edwards25519_scalarmuldouble_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmuldouble_zmontend -edwards25519_scalarmuldouble_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_zmontloop -edwards25519_scalarmuldouble_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmuldouble_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -edwards25519_scalarmuldouble_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmuldouble_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmuldouble_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmuldouble_negskip1 -edwards25519_scalarmuldouble_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmuldouble_negloop1 -edwards25519_scalarmuldouble_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmuldouble_negskip2 -edwards25519_scalarmuldouble_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmuldouble_negloop2 -edwards25519_scalarmuldouble_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -edwards25519_scalarmuldouble_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -edwards25519_scalarmuldouble_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_zfliploop - subs x2, x2, #0x3a - b.hi edwards25519_scalarmuldouble_outerloop + add x0, tabent + add x1, acc+64 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, acc, tabent. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b edwards25519_scalarmuldouble_invmidloop +edwards25519_scalarmuldouble_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +edwards25519_scalarmuldouble_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne edwards25519_scalarmuldouble_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // Store result. Note that these are the only reductions mod 2^255-19 @@ -2330,14 +3027,6 @@ edwards25519_scalarmuldouble_pepadd: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -edwards25519_scalarmuldouble_p25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. diff --git a/arm/curve25519/edwards25519_scalarmuldouble_alt.S b/arm/curve25519/edwards25519_scalarmuldouble_alt.S index c8fe77c31f..ad05eae1fb 100644 --- a/arm/curve25519/edwards25519_scalarmuldouble_alt.S +++ b/arm/curve25519/edwards25519_scalarmuldouble_alt.S @@ -57,14 +57,14 @@ #define scalar sp, #(0*NUMSIZE) #define bscalar sp, #(1*NUMSIZE) -#define acc sp, #(2*NUMSIZE) -#define acc_x sp, #(2*NUMSIZE) -#define acc_y sp, #(3*NUMSIZE) -#define acc_z sp, #(4*NUMSIZE) -#define acc_w sp, #(5*NUMSIZE) +#define btabent sp, #(2*NUMSIZE) +#define acc sp, #(5*NUMSIZE) +#define acc_x sp, #(5*NUMSIZE) +#define acc_y sp, #(6*NUMSIZE) +#define acc_z sp, #(7*NUMSIZE) +#define acc_w sp, #(8*NUMSIZE) -#define tabent sp, #(6*NUMSIZE) -#define btabent sp, #(10*NUMSIZE) +#define tabent sp, #(9*NUMSIZE) #define tab sp, #(13*NUMSIZE) @@ -1656,347 +1656,1044 @@ edwards25519_scalarmuldouble_alt_loop: // Modular inverse setup - mov x0, #4 - add x1, tabent - add x2, acc+64 - adr x3, edwards25519_scalarmuldouble_alt_p25519 - add x4, btabent - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "arm/generic/bignum_modinv.S". - -edwards25519_scalarmuldouble_alt_modinv: - lsl x10, x0, #3 - add x21, x4, x10 - add x22, x21, x10 - mov x10, xzr -edwards25519_scalarmuldouble_alt_copyloop: - ldr x11, [x2, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - str x11, [x21, x10, lsl #3] - str x12, [x22, x10, lsl #3] - str x12, [x4, x10, lsl #3] - str xzr, [x1, x10, lsl #3] - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmuldouble_alt_copyloop - ldr x11, [x4] - sub x12, x11, #0x1 - str x12, [x4] - lsl x20, x11, #2 - sub x20, x11, x20 - eor x20, x20, #0x2 - mov x12, #0x1 - madd x12, x11, x20, x12 - mul x11, x12, x12 - madd x20, x12, x20, x20 - mul x12, x11, x11 - madd x20, x11, x20, x20 - mul x11, x12, x12 - madd x20, x12, x20, x20 - madd x20, x11, x20, x20 - lsl x2, x0, #7 -edwards25519_scalarmuldouble_alt_outerloop: - add x10, x2, #0x3f - lsr x5, x10, #6 - cmp x5, x0 - csel x5, x0, x5, cs - mov x13, xzr - mov x15, xzr - mov x14, xzr - mov x16, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmuldouble_alt_toploop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - orr x17, x11, x12 - cmp x17, xzr - and x17, x19, x13 - csel x15, x17, x15, ne - and x17, x19, x14 - csel x16, x17, x16, ne - csel x13, x11, x13, ne - csel x14, x12, x14, ne - csetm x19, ne - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmuldouble_alt_toploop - orr x11, x13, x14 - clz x12, x11 - negs x17, x12 - lsl x13, x13, x12 - csel x15, x15, xzr, ne - lsl x14, x14, x12 - csel x16, x16, xzr, ne - lsr x15, x15, x17 - lsr x16, x16, x17 - orr x13, x13, x15 - orr x14, x14, x16 - ldr x15, [x21] - ldr x16, [x22] - mov x6, #0x1 - mov x7, xzr - mov x8, xzr - mov x9, #0x1 - mov x10, #0x3a - tst x15, #0x1 -edwards25519_scalarmuldouble_alt_innerloop: - csel x11, x14, xzr, ne - csel x12, x16, xzr, ne - csel x17, x8, xzr, ne - csel x19, x9, xzr, ne - ccmp x13, x14, #0x2, ne - sub x11, x13, x11 - sub x12, x15, x12 - csel x14, x14, x13, cs - cneg x11, x11, cc - csel x16, x16, x15, cs - cneg x15, x12, cc - csel x8, x8, x6, cs - csel x9, x9, x7, cs - tst x12, #0x2 - add x6, x6, x17 - add x7, x7, x19 - lsr x13, x11, #1 - lsr x15, x15, #1 - add x8, x8, x8 - add x9, x9, x9 - sub x10, x10, #0x1 - cbnz x10, edwards25519_scalarmuldouble_alt_innerloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmuldouble_alt_congloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - adds x15, x15, x16 - extr x17, x15, x17, #58 - str x17, [x4, x10, lsl #3] - mov x17, x15 - umulh x15, x7, x12 - adc x13, x13, x15 - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - adds x15, x15, x16 - extr x19, x15, x19, #58 - str x19, [x1, x10, lsl #3] - mov x19, x15 - umulh x15, x9, x12 - adc x14, x14, x15 - add x10, x10, #0x1 - cmp x10, x0 - b.cc edwards25519_scalarmuldouble_alt_congloop - extr x13, x13, x17, #58 - extr x14, x14, x19, #58 - ldr x11, [x4] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmuldouble_alt_wmontend -edwards25519_scalarmuldouble_alt_wmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x4, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_wmontloop -edwards25519_scalarmuldouble_alt_wmontend: - adcs x16, x16, x13 - adc x13, xzr, xzr - sub x15, x10, #0x1 - str x16, [x4, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmuldouble_alt_wcmploop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_wcmploop - sbcs xzr, x13, xzr - csetm x13, cs - negs x10, xzr -edwards25519_scalarmuldouble_alt_wcorrloop: - ldr x11, [x4, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x13 - sbcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_wcorrloop - ldr x11, [x1] - mul x17, x11, x20 - ldr x12, [x3] - mul x15, x17, x12 - umulh x16, x17, x12 - adds x11, x11, x15 - mov x10, #0x1 - sub x11, x0, #0x1 - cbz x11, edwards25519_scalarmuldouble_alt_zmontend -edwards25519_scalarmuldouble_alt_zmontloop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - mul x15, x17, x11 - adcs x12, x12, x16 - umulh x16, x17, x11 - adc x16, x16, xzr - adds x12, x12, x15 - sub x15, x10, #0x1 - str x12, [x1, x15, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_zmontloop -edwards25519_scalarmuldouble_alt_zmontend: - adcs x16, x16, x14 - adc x14, xzr, xzr - sub x15, x10, #0x1 - str x16, [x1, x15, lsl #3] - negs x10, xzr -edwards25519_scalarmuldouble_alt_zcmploop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - sbcs xzr, x11, x12 - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_zcmploop - sbcs xzr, x14, xzr - csetm x14, cs - negs x10, xzr -edwards25519_scalarmuldouble_alt_zcorrloop: - ldr x11, [x1, x10, lsl #3] - ldr x12, [x3, x10, lsl #3] - and x12, x12, x14 - sbcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_zcorrloop - mov x13, xzr - mov x14, xzr - mov x17, xzr - mov x19, xzr - mov x10, xzr -edwards25519_scalarmuldouble_alt_crossloop: - ldr x11, [x21, x10, lsl #3] - ldr x12, [x22, x10, lsl #3] - mul x15, x6, x11 - mul x16, x7, x12 - adds x15, x15, x13 - umulh x13, x6, x11 - adc x13, x13, xzr - subs x15, x15, x16 - str x15, [x21, x10, lsl #3] - umulh x15, x7, x12 - sub x17, x15, x17 - sbcs x13, x13, x17 - csetm x17, cc - mul x15, x8, x11 - mul x16, x9, x12 - adds x15, x15, x14 - umulh x14, x8, x11 - adc x14, x14, xzr - subs x15, x15, x16 - str x15, [x22, x10, lsl #3] - umulh x15, x9, x12 - sub x19, x15, x19 - sbcs x14, x14, x19 - csetm x19, cc - add x10, x10, #0x1 - cmp x10, x5 - b.cc edwards25519_scalarmuldouble_alt_crossloop - cmn x17, x17 - ldr x15, [x21] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmuldouble_alt_negskip1 -edwards25519_scalarmuldouble_alt_negloop1: - add x11, x10, #0x8 - ldr x12, [x21, x11] - extr x15, x12, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmuldouble_alt_negloop1 -edwards25519_scalarmuldouble_alt_negskip1: - extr x15, x13, x15, #58 - eor x15, x15, x17 - adcs x15, x15, xzr - str x15, [x21, x10] - cmn x19, x19 - ldr x15, [x22] - mov x10, xzr - sub x6, x5, #0x1 - cbz x6, edwards25519_scalarmuldouble_alt_negskip2 -edwards25519_scalarmuldouble_alt_negloop2: - add x11, x10, #0x8 - ldr x12, [x22, x11] - extr x15, x12, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x15, x12 - add x10, x10, #0x8 - sub x6, x6, #0x1 - cbnz x6, edwards25519_scalarmuldouble_alt_negloop2 -edwards25519_scalarmuldouble_alt_negskip2: - extr x15, x14, x15, #58 - eor x15, x15, x19 - adcs x15, x15, xzr - str x15, [x22, x10] - mov x10, xzr - cmn x17, x17 -edwards25519_scalarmuldouble_alt_wfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x4, x10, lsl #3] - and x11, x11, x17 - eor x12, x12, x17 - adcs x11, x11, x12 - str x11, [x4, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_wfliploop - mvn x19, x19 - mov x10, xzr - cmn x19, x19 -edwards25519_scalarmuldouble_alt_zfliploop: - ldr x11, [x3, x10, lsl #3] - ldr x12, [x1, x10, lsl #3] - and x11, x11, x19 - eor x12, x12, x19 - adcs x11, x11, x12 - str x11, [x1, x10, lsl #3] - add x10, x10, #0x1 - sub x11, x10, x0 - cbnz x11, edwards25519_scalarmuldouble_alt_zfliploop - subs x2, x2, #0x3a - b.hi edwards25519_scalarmuldouble_alt_outerloop + add x0, tabent + add x1, acc+64 + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 128 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, acc, tabent. + + mov x20, x0 + mov x10, #0xffffffffffffffed + mov x11, #0xffffffffffffffff + stp x10, x11, [sp] + mov x12, #0x7fffffffffffffff + stp x11, x12, [sp, #16] + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + mov x7, #0x13 + lsr x6, x5, #63 + madd x6, x7, x6, x7 + adds x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + orr x5, x5, #0x8000000000000000 + adcs x5, x5, xzr + csel x6, x7, xzr, cc + subs x2, x2, x6 + sbcs x3, x3, xzr + sbcs x4, x4, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + stp x2, x3, [sp, #32] + stp x4, x5, [sp, #48] + stp xzr, xzr, [sp, #64] + stp xzr, xzr, [sp, #80] + mov x10, #0x2099 + movk x10, #0x7502, lsl #16 + movk x10, #0x9e23, lsl #32 + movk x10, #0xa0f9, lsl #48 + mov x11, #0x2595 + movk x11, #0x1d13, lsl #16 + movk x11, #0x8f3f, lsl #32 + movk x11, #0xa8c6, lsl #48 + mov x12, #0x5242 + movk x12, #0x5ac, lsl #16 + movk x12, #0x8938, lsl #32 + movk x12, #0x6c6c, lsl #48 + mov x13, #0x615 + movk x13, #0x4177, lsl #16 + movk x13, #0x8b2, lsl #32 + movk x13, #0x2765, lsl #48 + stp x10, x11, [sp, #96] + stp x12, x13, [sp, #112] + mov x21, #0xa + mov x22, #0x1 + b edwards25519_scalarmuldouble_alt_invmidloop +edwards25519_scalarmuldouble_alt_invloop: + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + and x0, x12, x16 + and x1, x13, x17 + add x19, x0, x1 + ldr x7, [sp] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #32] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x7, [sp, #8] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #40] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [sp] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [sp, #32] + ldr x7, [sp, #16] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #48] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [sp, #8] + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [sp, #40] + ldr x7, [sp, #24] + eor x1, x7, x14 + asr x3, x1, #63 + and x3, x3, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #56] + eor x1, x8, x15 + asr x0, x1, #63 + and x0, x0, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [sp, #16] + extr x5, x3, x5, #59 + str x5, [sp, #24] + eor x1, x7, x16 + asr x5, x1, #63 + and x5, x5, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + asr x0, x1, #63 + and x0, x0, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [sp, #48] + extr x2, x5, x2, #59 + str x2, [sp, #56] + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x5, x19, x0 + adc x3, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x5, x5, x0 + str x5, [sp, #96] + adc x3, x3, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x3, x3, x0 + str x3, [sp, #104] + adc x4, x4, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + eor x1, x7, x16 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, x17 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x4, x4, x0 + str x4, [sp, #112] + adc x2, x2, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + add x6, x6, x3, asr #63 + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x3, x6, x3 + ldr x6, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x3 + asr x3, x3, #63 + adcs x6, x6, x3 + adc x5, x5, x3 + stp x0, x1, [sp, #64] + stp x6, x5, [sp, #80] + eor x1, x7, x16 + and x5, x16, x12 + neg x5, x5 + mul x0, x1, x12 + umulh x1, x1, x12 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, x17 + and x0, x17, x13 + sub x5, x5, x0 + mul x0, x1, x13 + umulh x1, x1, x13 + adds x2, x2, x0 + adc x5, x5, x1 + extr x6, x5, x2, #63 + ldp x0, x1, [sp, #96] + add x6, x6, x5, asr #63 + mov x5, #0x13 + mul x4, x6, x5 + add x2, x2, x6, lsl #63 + smulh x5, x6, x5 + ldr x3, [sp, #112] + adds x0, x0, x4 + adcs x1, x1, x5 + asr x5, x5, #63 + adcs x3, x3, x5 + adc x2, x2, x5 + stp x0, x1, [sp, #96] + stp x3, x2, [sp, #112] +edwards25519_scalarmuldouble_alt_invmidloop: + mov x1, x22 + ldr x2, [sp] + ldr x3, [sp, #32] + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x8, x4, #0x100, lsl #12 + sbfx x8, x8, #21, #21 + mov x11, #0x100000 + add x11, x11, x11, lsl #21 + add x9, x4, x11 + asr x9, x9, #42 + add x10, x5, #0x100, lsl #12 + sbfx x10, x10, #21, #21 + add x11, x5, x11 + asr x11, x11, #42 + mul x6, x8, x2 + mul x7, x9, x3 + mul x2, x10, x2 + mul x3, x11, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #21, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #42 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #21, #21 + add x15, x5, x15 + asr x15, x15, #42 + mul x6, x12, x2 + mul x7, x13, x3 + mul x2, x14, x2 + mul x3, x15, x3 + add x4, x6, x7 + add x5, x2, x3 + asr x2, x4, #20 + asr x3, x5, #20 + and x4, x2, #0xfffff + orr x4, x4, #0xfffffe0000000000 + and x5, x3, #0xfffff + orr x5, x5, #0xc000000000000000 + tst x5, #0x1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + mul x2, x12, x8 + mul x3, x12, x9 + mul x6, x14, x8 + mul x7, x14, x9 + madd x8, x13, x10, x2 + madd x9, x13, x11, x3 + madd x16, x15, x10, x6 + madd x17, x15, x11, x7 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + tst x5, #0x2 + asr x5, x5, #1 + csel x6, x4, xzr, ne + ccmp x1, xzr, #0x8, ne + cneg x1, x1, ge + cneg x6, x6, ge + csel x4, x5, x4, ge + add x5, x5, x6 + add x1, x1, #0x2 + asr x5, x5, #1 + add x12, x4, #0x100, lsl #12 + sbfx x12, x12, #22, #21 + mov x15, #0x100000 + add x15, x15, x15, lsl #21 + add x13, x4, x15 + asr x13, x13, #43 + add x14, x5, #0x100, lsl #12 + sbfx x14, x14, #22, #21 + add x15, x5, x15 + asr x15, x15, #43 + mneg x2, x12, x8 + mneg x3, x12, x9 + mneg x4, x14, x8 + mneg x5, x14, x9 + msub x10, x13, x16, x2 + msub x11, x13, x17, x3 + msub x12, x15, x16, x4 + msub x13, x15, x17, x5 + mov x22, x1 + subs x21, x21, #0x1 + b.ne edwards25519_scalarmuldouble_alt_invloop + ldr x0, [sp] + ldr x1, [sp, #32] + mul x0, x0, x10 + madd x1, x1, x11, x0 + asr x0, x1, #63 + cmp x10, xzr + csetm x14, mi + cneg x10, x10, mi + eor x14, x14, x0 + cmp x11, xzr + csetm x15, mi + cneg x11, x11, mi + eor x15, x15, x0 + cmp x12, xzr + csetm x16, mi + cneg x12, x12, mi + eor x16, x16, x0 + cmp x13, xzr + csetm x17, mi + cneg x13, x13, mi + eor x17, x17, x0 + and x0, x10, x14 + and x1, x11, x15 + add x9, x0, x1 + ldr x7, [sp, #64] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x4, x9, x0 + adc x2, xzr, x1 + ldr x8, [sp, #96] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x4, x4, x0 + str x4, [sp, #64] + adc x2, x2, x1 + ldr x7, [sp, #72] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [sp, #104] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x2, x2, x0 + str x2, [sp, #72] + adc x6, x6, x1 + ldr x7, [sp, #80] + eor x1, x7, x14 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [sp, #112] + eor x1, x8, x15 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x6, x6, x0 + str x6, [sp, #80] + adc x5, x5, x1 + ldr x7, [sp, #88] + eor x1, x7, x14 + and x3, x14, x10 + neg x3, x3 + mul x0, x1, x10 + umulh x1, x1, x10 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [sp, #120] + eor x1, x8, x15 + and x0, x15, x11 + sub x3, x3, x0 + mul x0, x1, x11 + umulh x1, x1, x11 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x3, x5, #63 + ldp x0, x1, [sp, #64] + tst x3, x3 + cinc x6, x6, pl + mov x3, #0x13 + mul x4, x6, x3 + add x5, x5, x6, lsl #63 + smulh x6, x6, x3 + ldr x2, [sp, #80] + adds x0, x0, x4 + adcs x1, x1, x6 + asr x6, x6, #63 + adcs x2, x2, x6 + adcs x5, x5, x6 + csel x3, x3, xzr, mi + subs x0, x0, x3 + sbcs x1, x1, xzr + sbcs x2, x2, xzr + sbc x5, x5, xzr + and x5, x5, #0x7fffffffffffffff + mov x4, x20 + stp x0, x1, [x4] + stp x2, x5, [x4, #16] // Store result. Note that these are the only reductions mod 2^255-19 @@ -2114,14 +2811,6 @@ edwards25519_scalarmuldouble_alt_pepadd: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -edwards25519_scalarmuldouble_alt_p25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. diff --git a/x86_att/curve25519/edwards25519_scalarmulbase.S b/x86_att/curve25519/edwards25519_scalarmulbase.S index a024c9daa4..c44e31724c 100644 --- a/x86_att/curve25519/edwards25519_scalarmulbase.S +++ b/x86_att/curve25519/edwards25519_scalarmulbase.S @@ -38,23 +38,22 @@ #define xpy_2 (2*NUMSIZE)(%rsp) #define kxy_2 (3*NUMSIZE)(%rsp) -#define acc (4*NUMSIZE)(%rsp) -#define x_1 (4*NUMSIZE)(%rsp) -#define y_1 (5*NUMSIZE)(%rsp) -#define z_1 (6*NUMSIZE)(%rsp) -#define w_1 (7*NUMSIZE)(%rsp) -#define x_3 (4*NUMSIZE)(%rsp) -#define y_3 (5*NUMSIZE)(%rsp) -#define z_3 (6*NUMSIZE)(%rsp) -#define w_3 (7*NUMSIZE)(%rsp) - -#define tmpspace (8*NUMSIZE)(%rsp) -#define t0 (8*NUMSIZE)(%rsp) -#define t1 (9*NUMSIZE)(%rsp) -#define t2 (10*NUMSIZE)(%rsp) -#define t3 (11*NUMSIZE)(%rsp) -#define t4 (12*NUMSIZE)(%rsp) -#define t5 (13*NUMSIZE)(%rsp) +#define t0 (4*NUMSIZE)(%rsp) +#define t1 (5*NUMSIZE)(%rsp) +#define t2 (6*NUMSIZE)(%rsp) +#define t3 (7*NUMSIZE)(%rsp) +#define t4 (8*NUMSIZE)(%rsp) +#define t5 (9*NUMSIZE)(%rsp) + +#define acc (10*NUMSIZE)(%rsp) +#define x_1 (10*NUMSIZE)(%rsp) +#define y_1 (11*NUMSIZE)(%rsp) +#define z_1 (12*NUMSIZE)(%rsp) +#define w_1 (13*NUMSIZE)(%rsp) +#define x_3 (10*NUMSIZE)(%rsp) +#define y_3 (11*NUMSIZE)(%rsp) +#define z_3 (12*NUMSIZE)(%rsp) +#define w_3 (13*NUMSIZE)(%rsp) // Stable homes for the input result pointer, and other variables @@ -73,6 +72,15 @@ #define NSPACE (15*NUMSIZE+8) +// Syntactic variants to make x86_att version simpler to generate + +#define SCALAR 0 +#define TABENT (1*NUMSIZE) +#define ACC (10*NUMSIZE) +#define X3 (10*NUMSIZE) +#define Z3 (12*NUMSIZE) +#define W3 (13*NUMSIZE) + // Macro wrapping up the basic field multiplication, only trivially // different from a pure function call to bignum_mul_p25519. @@ -337,12 +345,12 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase): pushq %rsi movq %rcx, %rdi movq %rdx, %rsi - callq edwards25519_scalarmulbase_curve25519_x25519base_standard + callq edwards25519_scalarmulbase_standard popq %rsi popq %rdi ret -edwards25519_scalarmulbase_curve25519_x25519base_standard: +edwards25519_scalarmulbase_standard: #endif // Save registers, make room for temps, preserve input arguments. @@ -413,11 +421,11 @@ edwards25519_scalarmulbase_curve25519_x25519base_standard: // And before we store the scalar, test and reset bit 251 to // initialize the main loop just below. - movq %r8, (%rsp) - movq %r9, 8(%rsp) - movq %r10, 16(%rsp) + movq %r8, SCALAR(%rsp) + movq %r9, SCALAR+8(%rsp) + movq %r10, SCALAR+16(%rsp) btr $59, %r11 - movq %r11, 24(%rsp) + movq %r11, SCALAR+24(%rsp) // The main part of the computation is in extended-projective coordinates // (X,Y,Z,T), representing an affine point on the edwards25519 curve @@ -428,75 +436,75 @@ edwards25519_scalarmulbase_curve25519_x25519base_standard: // Initialize accumulator "acc" to either 0 or 2^251 * B depending on // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. - leaq edwards25519_scalarmulbase_edwards25519_0g(%rip), %r10 - leaq edwards25519_scalarmulbase_edwards25519_251g(%rip), %r11 + leaq edwards25519_scalarmulbase_0g(%rip), %r10 + leaq edwards25519_scalarmulbase_251g(%rip), %r11 movq (%r10), %rax movq (%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*16(%rsp) + movq %rax, ACC(%rsp) movq 8*1(%r10), %rax movq 8*1(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*17(%rsp) + movq %rax, ACC+8(%rsp) movq 8*2(%r10), %rax movq 8*2(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*18(%rsp) + movq %rax, ACC+16(%rsp) movq 8*3(%r10), %rax movq 8*3(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*19(%rsp) + movq %rax, ACC+24(%rsp) movq 8*4(%r10), %rax movq 8*4(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*20(%rsp) + movq %rax, ACC+32(%rsp) movq 8*5(%r10), %rax movq 8*5(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*21(%rsp) + movq %rax, ACC+40(%rsp) movq 8*6(%r10), %rax movq 8*6(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*22(%rsp) + movq %rax, ACC+48(%rsp) movq 8*7(%r10), %rax movq 8*7(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*23(%rsp) + movq %rax, ACC+56(%rsp) movl $1, %eax - movq %rax, 8*24(%rsp) + movq %rax, ACC+64(%rsp) movl $0, %eax - movq %rax, 8*25(%rsp) - movq %rax, 8*26(%rsp) - movq %rax, 8*27(%rsp) + movq %rax, ACC+72(%rsp) + movq %rax, ACC+80(%rsp) + movq %rax, ACC+88(%rsp) movq 8*8(%r10), %rax movq 8*8(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*28(%rsp) + movq %rax, ACC+96(%rsp) movq 8*9(%r10), %rax movq 8*9(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*29(%rsp) + movq %rax, ACC+104(%rsp) movq 8*10(%r10), %rax movq 8*10(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*30(%rsp) + movq %rax, ACC+112(%rsp) movq 8*11(%r10), %rax movq 8*11(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*31(%rsp) + movq %rax, ACC+120(%rsp) // The counter "i" tracks the bit position for which the scalar has // already been absorbed, starting at 0 and going up in chunks of 4. @@ -512,7 +520,7 @@ edwards25519_scalarmulbase_curve25519_x25519base_standard: // end because we made sure bit 251 is clear in the reduced scalar. movq $0, i - leaq edwards25519_scalarmulbase_edwards25519_gtable(%rip), %rax + leaq edwards25519_scalarmulbase_gtable(%rip), %rax movq %rax, tab movq $0, bias @@ -804,26 +812,26 @@ edwards25519_scalarmulbase_scalarloop: movq %rax, %rsi cmovnzq %r8, %rsi cmovnzq %rax, %r8 - movq %rsi, 32(%rsp) - movq %r8, 64(%rsp) + movq %rsi, TABENT(%rsp) + movq %r8, TABENT+32(%rsp) movq %rbx, %rsi cmovnzq %r9, %rsi cmovnzq %rbx, %r9 - movq %rsi, 40(%rsp) - movq %r9, 72(%rsp) + movq %rsi, TABENT+8(%rsp) + movq %r9, TABENT+40(%rsp) movq %rcx, %rsi cmovnzq %r10, %rsi cmovnzq %rcx, %r10 - movq %rsi, 48(%rsp) - movq %r10, 80(%rsp) + movq %rsi, TABENT+16(%rsp) + movq %r10, TABENT+48(%rsp) movq %rdx, %rsi cmovnzq %r11, %rsi cmovnzq %rdx, %r11 - movq %rsi, 56(%rsp) - movq %r11, 88(%rsp) + movq %rsi, TABENT+24(%rsp) + movq %r11, TABENT+56(%rsp) movq $-19, %rax movq $-1, %rbx @@ -844,10 +852,10 @@ edwards25519_scalarmulbase_scalarloop: cmovzq %r13, %rbx cmovzq %r14, %rcx cmovzq %r15, %rdx - movq %rax, 96(%rsp) - movq %rbx, 104(%rsp) - movq %rcx, 112(%rsp) - movq %rdx, 120(%rsp) + movq %rax, TABENT+64(%rsp) + movq %rbx, TABENT+72(%rsp) + movq %rcx, TABENT+80(%rsp) + movq %rdx, TABENT+88(%rsp) // Extended-projective and precomputed mixed addition. // This is effectively the same as calling the standalone @@ -884,10 +892,10 @@ edwards25519_scalarmulbase_scalarloop: // point on we don't need any normalization of the coordinates // except for making sure that they fit in 4 digits. - movq 128(%rsp), %r8 - movq 136(%rsp), %r9 - movq 144(%rsp), %r10 - movq 152(%rsp), %r11 + movq X3(%rsp), %r8 + movq X3+8(%rsp), %r9 + movq X3+16(%rsp), %r10 + movq X3+24(%rsp), %r11 movq $0xffffffffffffffda, %r12 subq %r8, %r12 movq $0xffffffffffffffff, %r13 @@ -896,424 +904,1377 @@ edwards25519_scalarmulbase_scalarloop: sbbq %r10, %r14 movq $0xffffffffffffffff, %r15 sbbq %r11, %r15 - movq 24(%rsp), %rax + movq SCALAR+24(%rsp), %rax btq $63, %rax cmovcq %r12, %r8 cmovcq %r13, %r9 cmovcq %r14, %r10 cmovcq %r15, %r11 - movq %r8, 128(%rsp) - movq %r9, 136(%rsp) - movq %r10, 144(%rsp) - movq %r11, 152(%rsp) + movq %r8, X3(%rsp) + movq %r9, X3+8(%rsp) + movq %r10, X3+16(%rsp) + movq %r11, X3+24(%rsp) // Now we need to map out of the extended-projective representation // (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means // first calling the modular inverse to get w_3 = 1/z_3. - movq $4, %rdi - leaq 224(%rsp), %rsi - leaq 192(%rsp), %rdx - leaq edwards25519_scalarmulbase_p_25519(%rip), %rcx - leaq 256(%rsp), %r8 - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites local variables that are no longer needed. - - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmulbase_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp + leaq W3(%rsp), %rdi + leaq Z3(%rsp), %rsi + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, x_3, y_3, +// z_3 and w_3. + + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -edwards25519_scalarmulbase_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -edwards25519_scalarmulbase_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmulbase_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -edwards25519_scalarmulbase_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp edwards25519_scalarmulbase_midloop +edwards25519_scalarmulbase_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne edwards25519_scalarmulbase_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmulbase_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi + mulq %r12 + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmulbase_wmontend -edwards25519_scalarmulbase_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_wmontloop -edwards25519_scalarmulbase_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_wcorrloop + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmulbase_zmontend -edwards25519_scalarmulbase_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_zmontloop -edwards25519_scalarmulbase_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -edwards25519_scalarmulbase_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +edwards25519_scalarmulbase_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmulbase_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -edwards25519_scalarmulbase_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmulbase_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -edwards25519_scalarmulbase_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne edwards25519_scalarmulbase_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -edwards25519_scalarmulbase_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_fliploop - subq $0x3a, 0x20(%rsp) - ja edwards25519_scalarmulbase_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne edwards25519_scalarmulbase_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1344,18 +2305,10 @@ edwards25519_scalarmulbase_fliploop: // .section .rodata // **************************************************************************** -// The modulus, for the modular inverse - -edwards25519_scalarmulbase_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_scalarmulbase_edwards25519_0g: +edwards25519_scalarmulbase_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 @@ -1372,7 +2325,7 @@ edwards25519_scalarmulbase_edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 -edwards25519_scalarmulbase_edwards25519_251g: +edwards25519_scalarmulbase_251g: .quad 0x525f946d7c7220e7 .quad 0x4636b0b2f1e35444 @@ -1390,7 +2343,7 @@ edwards25519_scalarmulbase_edwards25519_251g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_scalarmulbase_edwards25519_gtable: +edwards25519_scalarmulbase_gtable: // 2^0 * 1 * G diff --git a/x86_att/curve25519/edwards25519_scalarmulbase_alt.S b/x86_att/curve25519/edwards25519_scalarmulbase_alt.S index e66492083f..00b91fe1aa 100644 --- a/x86_att/curve25519/edwards25519_scalarmulbase_alt.S +++ b/x86_att/curve25519/edwards25519_scalarmulbase_alt.S @@ -38,23 +38,22 @@ #define xpy_2 (2*NUMSIZE)(%rsp) #define kxy_2 (3*NUMSIZE)(%rsp) -#define acc (4*NUMSIZE)(%rsp) -#define x_1 (4*NUMSIZE)(%rsp) -#define y_1 (5*NUMSIZE)(%rsp) -#define z_1 (6*NUMSIZE)(%rsp) -#define w_1 (7*NUMSIZE)(%rsp) -#define x_3 (4*NUMSIZE)(%rsp) -#define y_3 (5*NUMSIZE)(%rsp) -#define z_3 (6*NUMSIZE)(%rsp) -#define w_3 (7*NUMSIZE)(%rsp) - -#define tmpspace (8*NUMSIZE)(%rsp) -#define t0 (8*NUMSIZE)(%rsp) -#define t1 (9*NUMSIZE)(%rsp) -#define t2 (10*NUMSIZE)(%rsp) -#define t3 (11*NUMSIZE)(%rsp) -#define t4 (12*NUMSIZE)(%rsp) -#define t5 (13*NUMSIZE)(%rsp) +#define t0 (4*NUMSIZE)(%rsp) +#define t1 (5*NUMSIZE)(%rsp) +#define t2 (6*NUMSIZE)(%rsp) +#define t3 (7*NUMSIZE)(%rsp) +#define t4 (8*NUMSIZE)(%rsp) +#define t5 (9*NUMSIZE)(%rsp) + +#define acc (10*NUMSIZE)(%rsp) +#define x_1 (10*NUMSIZE)(%rsp) +#define y_1 (11*NUMSIZE)(%rsp) +#define z_1 (12*NUMSIZE)(%rsp) +#define w_1 (13*NUMSIZE)(%rsp) +#define x_3 (10*NUMSIZE)(%rsp) +#define y_3 (11*NUMSIZE)(%rsp) +#define z_3 (12*NUMSIZE)(%rsp) +#define w_3 (13*NUMSIZE)(%rsp) // Stable homes for the input result pointer, and other variables @@ -73,6 +72,15 @@ #define NSPACE (15*NUMSIZE+8) +// Syntactic variants to make x86_att version simpler to generate + +#define SCALAR 0 +#define TABENT (1*NUMSIZE) +#define ACC (10*NUMSIZE) +#define X3 (10*NUMSIZE) +#define Z3 (12*NUMSIZE) +#define W3 (13*NUMSIZE) + // Macro wrapping up the basic field multiplication, only trivially // different from a pure function call to bignum_mul_p25519_alt. @@ -413,12 +421,12 @@ S2N_BN_SYMBOL(edwards25519_scalarmulbase_alt): pushq %rsi movq %rcx, %rdi movq %rdx, %rsi - callq edwards25519_scalarmulbase_alt_curve25519_x25519base_standard + callq edwards25519_scalarmulbase_alt_standard popq %rsi popq %rdi ret -edwards25519_scalarmulbase_alt_curve25519_x25519base_standard: +edwards25519_scalarmulbase_alt_standard: #endif // Save registers, make room for temps, preserve input arguments. @@ -489,11 +497,11 @@ edwards25519_scalarmulbase_alt_curve25519_x25519base_standard: // And before we store the scalar, test and reset bit 251 to // initialize the main loop just below. - movq %r8, (%rsp) - movq %r9, 8(%rsp) - movq %r10, 16(%rsp) + movq %r8, SCALAR(%rsp) + movq %r9, SCALAR+8(%rsp) + movq %r10, SCALAR+16(%rsp) btr $59, %r11 - movq %r11, 24(%rsp) + movq %r11, SCALAR+24(%rsp) // The main part of the computation is in extended-projective coordinates // (X,Y,Z,T), representing an affine point on the edwards25519 curve @@ -504,75 +512,75 @@ edwards25519_scalarmulbase_alt_curve25519_x25519base_standard: // Initialize accumulator "acc" to either 0 or 2^251 * B depending on // bit 251 of the (reduced) scalar. That leaves bits 0..250 to handle. - leaq edwards25519_scalarmulbase_alt_edwards25519_0g(%rip), %r10 - leaq edwards25519_scalarmulbase_alt_edwards25519_251g(%rip), %r11 + leaq edwards25519_scalarmulbase_alt_0g(%rip), %r10 + leaq edwards25519_scalarmulbase_alt_251g(%rip), %r11 movq (%r10), %rax movq (%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*16(%rsp) + movq %rax, ACC(%rsp) movq 8*1(%r10), %rax movq 8*1(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*17(%rsp) + movq %rax, ACC+8(%rsp) movq 8*2(%r10), %rax movq 8*2(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*18(%rsp) + movq %rax, ACC+16(%rsp) movq 8*3(%r10), %rax movq 8*3(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*19(%rsp) + movq %rax, ACC+24(%rsp) movq 8*4(%r10), %rax movq 8*4(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*20(%rsp) + movq %rax, ACC+32(%rsp) movq 8*5(%r10), %rax movq 8*5(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*21(%rsp) + movq %rax, ACC+40(%rsp) movq 8*6(%r10), %rax movq 8*6(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*22(%rsp) + movq %rax, ACC+48(%rsp) movq 8*7(%r10), %rax movq 8*7(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*23(%rsp) + movq %rax, ACC+56(%rsp) movl $1, %eax - movq %rax, 8*24(%rsp) + movq %rax, ACC+64(%rsp) movl $0, %eax - movq %rax, 8*25(%rsp) - movq %rax, 8*26(%rsp) - movq %rax, 8*27(%rsp) + movq %rax, ACC+72(%rsp) + movq %rax, ACC+80(%rsp) + movq %rax, ACC+88(%rsp) movq 8*8(%r10), %rax movq 8*8(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*28(%rsp) + movq %rax, ACC+96(%rsp) movq 8*9(%r10), %rax movq 8*9(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*29(%rsp) + movq %rax, ACC+104(%rsp) movq 8*10(%r10), %rax movq 8*10(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*30(%rsp) + movq %rax, ACC+112(%rsp) movq 8*11(%r10), %rax movq 8*11(%r11), %rcx cmovcq %rcx, %rax - movq %rax, 8*31(%rsp) + movq %rax, ACC+120(%rsp) // The counter "i" tracks the bit position for which the scalar has // already been absorbed, starting at 0 and going up in chunks of 4. @@ -588,7 +596,7 @@ edwards25519_scalarmulbase_alt_curve25519_x25519base_standard: // end because we made sure bit 251 is clear in the reduced scalar. movq $0, i - leaq edwards25519_scalarmulbase_alt_edwards25519_gtable(%rip), %rax + leaq edwards25519_scalarmulbase_alt_gtable(%rip), %rax movq %rax, tab movq $0, bias @@ -880,26 +888,26 @@ edwards25519_scalarmulbase_alt_scalarloop: movq %rax, %rsi cmovnzq %r8, %rsi cmovnzq %rax, %r8 - movq %rsi, 32(%rsp) - movq %r8, 64(%rsp) + movq %rsi, TABENT(%rsp) + movq %r8, TABENT+32(%rsp) movq %rbx, %rsi cmovnzq %r9, %rsi cmovnzq %rbx, %r9 - movq %rsi, 40(%rsp) - movq %r9, 72(%rsp) + movq %rsi, TABENT+8(%rsp) + movq %r9, TABENT+40(%rsp) movq %rcx, %rsi cmovnzq %r10, %rsi cmovnzq %rcx, %r10 - movq %rsi, 48(%rsp) - movq %r10, 80(%rsp) + movq %rsi, TABENT+16(%rsp) + movq %r10, TABENT+48(%rsp) movq %rdx, %rsi cmovnzq %r11, %rsi cmovnzq %rdx, %r11 - movq %rsi, 56(%rsp) - movq %r11, 88(%rsp) + movq %rsi, TABENT+24(%rsp) + movq %r11, TABENT+56(%rsp) movq $-19, %rax movq $-1, %rbx @@ -920,10 +928,10 @@ edwards25519_scalarmulbase_alt_scalarloop: cmovzq %r13, %rbx cmovzq %r14, %rcx cmovzq %r15, %rdx - movq %rax, 96(%rsp) - movq %rbx, 104(%rsp) - movq %rcx, 112(%rsp) - movq %rdx, 120(%rsp) + movq %rax, TABENT+64(%rsp) + movq %rbx, TABENT+72(%rsp) + movq %rcx, TABENT+80(%rsp) + movq %rdx, TABENT+88(%rsp) // Extended-projective and precomputed mixed addition. // This is effectively the same as calling the standalone @@ -960,10 +968,10 @@ edwards25519_scalarmulbase_alt_scalarloop: // point on we don't need any normalization of the coordinates // except for making sure that they fit in 4 digits. - movq 128(%rsp), %r8 - movq 136(%rsp), %r9 - movq 144(%rsp), %r10 - movq 152(%rsp), %r11 + movq X3(%rsp), %r8 + movq X3+8(%rsp), %r9 + movq X3+16(%rsp), %r10 + movq X3+24(%rsp), %r11 movq $0xffffffffffffffda, %r12 subq %r8, %r12 movq $0xffffffffffffffff, %r13 @@ -972,424 +980,1377 @@ edwards25519_scalarmulbase_alt_scalarloop: sbbq %r10, %r14 movq $0xffffffffffffffff, %r15 sbbq %r11, %r15 - movq 24(%rsp), %rax + movq SCALAR+24(%rsp), %rax btq $63, %rax cmovcq %r12, %r8 cmovcq %r13, %r9 cmovcq %r14, %r10 cmovcq %r15, %r11 - movq %r8, 128(%rsp) - movq %r9, 136(%rsp) - movq %r10, 144(%rsp) - movq %r11, 152(%rsp) + movq %r8, X3(%rsp) + movq %r9, X3+8(%rsp) + movq %r10, X3+16(%rsp) + movq %r11, X3+24(%rsp) // Now we need to map out of the extended-projective representation // (X,Y,Z,W) back to the affine form (x,y) = (X/Z,Y/Z). This means // first calling the modular inverse to get w_3 = 1/z_3. - movq $4, %rdi - leaq 224(%rsp), %rsi - leaq 192(%rsp), %rdx - leaq edwards25519_scalarmulbase_alt_p_25519(%rip), %rcx - leaq 256(%rsp), %r8 - -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites local variables that are no longer needed. - - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_alt_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp + leaq W3(%rsp), %rdi + leaq Z3(%rsp), %rsi + +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, x_3, y_3, +// z_3 and w_3. + + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -edwards25519_scalarmulbase_alt_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmulbase_alt_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -edwards25519_scalarmulbase_alt_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp edwards25519_scalarmulbase_alt_midloop +edwards25519_scalarmulbase_alt_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne edwards25519_scalarmulbase_alt_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_alt_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi + mulq %r12 + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmulbase_alt_wmontend -edwards25519_scalarmulbase_alt_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_alt_wmontloop -edwards25519_scalarmulbase_alt_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_alt_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_alt_wcorrloop + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmulbase_alt_zmontend -edwards25519_scalarmulbase_alt_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_alt_zmontloop -edwards25519_scalarmulbase_alt_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmulbase_alt_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_alt_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -edwards25519_scalarmulbase_alt_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +edwards25519_scalarmulbase_alt_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmulbase_alt_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -edwards25519_scalarmulbase_alt_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmulbase_alt_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -edwards25519_scalarmulbase_alt_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne edwards25519_scalarmulbase_alt_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -edwards25519_scalarmulbase_alt_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmulbase_alt_fliploop - subq $0x3a, 0x20(%rsp) - ja edwards25519_scalarmulbase_alt_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne edwards25519_scalarmulbase_alt_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // The final result is x = X * inv(Z), y = Y * inv(Z). // These are the only operations in the whole computation that @@ -1420,18 +2381,10 @@ edwards25519_scalarmulbase_alt_fliploop: // .section .rodata // **************************************************************************** -// The modulus, for the modular inverse - -edwards25519_scalarmulbase_alt_p_25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // 0 * B = 0 and 2^251 * B in extended-projective coordinates // but with Z = 1 assumed and hence left out, so they are (X,Y,T) only. -edwards25519_scalarmulbase_alt_edwards25519_0g: +edwards25519_scalarmulbase_alt_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 @@ -1448,7 +2401,7 @@ edwards25519_scalarmulbase_alt_edwards25519_0g: .quad 0x0000000000000000 .quad 0x0000000000000000 -edwards25519_scalarmulbase_alt_edwards25519_251g: +edwards25519_scalarmulbase_alt_251g: .quad 0x525f946d7c7220e7 .quad 0x4636b0b2f1e35444 @@ -1466,7 +2419,7 @@ edwards25519_scalarmulbase_alt_edwards25519_251g: // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. -edwards25519_scalarmulbase_alt_edwards25519_gtable: +edwards25519_scalarmulbase_alt_gtable: // 2^0 * 1 * G diff --git a/x86_att/curve25519/edwards25519_scalarmuldouble.S b/x86_att/curve25519/edwards25519_scalarmuldouble.S index 0138d1a4b2..35fd7f4ffc 100644 --- a/x86_att/curve25519/edwards25519_scalarmuldouble.S +++ b/x86_att/curve25519/edwards25519_scalarmuldouble.S @@ -42,24 +42,33 @@ #define scalar (0*NUMSIZE)(%rsp) #define bscalar (1*NUMSIZE)(%rsp) -#define acc (3*NUMSIZE)(%rsp) +#define tabent (2*NUMSIZE)(%rsp) +#define btabent (6*NUMSIZE)(%rsp) -#define tabent (7*NUMSIZE)(%rsp) -#define btabent (11*NUMSIZE)(%rsp) +#define acc (9*NUMSIZE)(%rsp) -#define tab (14*NUMSIZE)(%rsp) +#define tab (13*NUMSIZE)(%rsp) // Additional variables kept on the stack -#define bf 2*NUMSIZE(%rsp) -#define cf 2*NUMSIZE+8(%rsp) -#define i 2*NUMSIZE+16(%rsp) -#define res 2*NUMSIZE+24(%rsp) +#define bf 45*NUMSIZE(%rsp) +#define cf 45*NUMSIZE+8(%rsp) +#define i 45*NUMSIZE+16(%rsp) +#define res 45*NUMSIZE+24(%rsp) // Total size to reserve on the stack (excluding local subroutines) #define NSPACE (46*NUMSIZE) +// Syntactic variants to make x86_att forms easier to generate + +#define SCALAR (0*NUMSIZE) +#define BSCALAR (1*NUMSIZE) +#define TABENT (2*NUMSIZE) +#define BTABENT (6*NUMSIZE) +#define ACC (9*NUMSIZE) +#define TAB (13*NUMSIZE) + // Sub-references used in local subroutines with local stack #define x_0 0(%rdi) @@ -493,10 +502,10 @@ edwards25519_scalarmuldouble_standard: adcq %r13, %r9 adcq %r14, %r10 adcq %r15, %r11 - movq %r8, 32(%rsp) - movq %r9, 40(%rsp) - movq %r10, 48(%rsp) - movq %r11, 56(%rsp) + movq %r8, BSCALAR(%rsp) + movq %r9, BSCALAR+8(%rsp) + movq %r10, BSCALAR+16(%rsp) + movq %r11, BSCALAR+24(%rsp) movq (%rsi), %r8 movq 8(%rsi), %r9 @@ -517,10 +526,10 @@ edwards25519_scalarmuldouble_standard: adcq %r13, %r9 adcq %r14, %r10 adcq %r15, %r11 - movq %r8, (%rsp) - movq %r9, 8(%rsp) - movq %r10, 16(%rsp) - movq %r11, 24(%rsp) + movq %r8, SCALAR(%rsp) + movq %r9, SCALAR+8(%rsp) + movq %r10, SCALAR+16(%rsp) + movq %r11, SCALAR+24(%rsp) // Create table of multiples 1..8 of the general input point at "tab". // Reduce the input coordinates x and y modulo 2^256 - 38 first, for the @@ -541,13 +550,13 @@ edwards25519_scalarmuldouble_standard: adcq %r10, %rcx adcq %r11, %rsi cmovncq %r8, %rax - movq %rax, 448(%rsp) + movq %rax, TAB(%rsp) cmovncq %r9, %rbx - movq %rbx, 456(%rsp) + movq %rbx, TAB+8(%rsp) cmovncq %r10, %rcx - movq %rcx, 464(%rsp) + movq %rcx, TAB+16(%rsp) cmovncq %r11, %rsi - movq %rsi, 472(%rsp) + movq %rsi, TAB+24(%rsp) movl $38, %eax movq 32(%rdx), %r8 @@ -562,69 +571,69 @@ edwards25519_scalarmuldouble_standard: adcq %r10, %rcx adcq %r11, %rsi cmovncq %r8, %rax - movq %rax, 480(%rsp) + movq %rax, TAB+32(%rsp) cmovncq %r9, %rbx - movq %rbx, 488(%rsp) + movq %rbx, TAB+40(%rsp) cmovncq %r10, %rcx - movq %rcx, 496(%rsp) + movq %rcx, TAB+48(%rsp) cmovncq %r11, %rsi - movq %rsi, 504(%rsp) + movq %rsi, TAB+56(%rsp) movl $1, %eax - movq %rax, 512(%rsp) + movq %rax, TAB+64(%rsp) xorl %eax, %eax - movq %rax, 520(%rsp) - movq %rax, 528(%rsp) - movq %rax, 536(%rsp) + movq %rax, TAB+72(%rsp) + movq %rax, TAB+80(%rsp) + movq %rax, TAB+88(%rsp) - leaq 544(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 480(%rsp), %rbp + leaq TAB+96(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+32(%rsp), %rbp mul_4(x_0,x_1,x_2) // Multiple 2 - leaq 576(%rsp), %rdi - leaq 448(%rsp), %rsi + leaq TAB+1*128(%rsp), %rdi + leaq TAB(%rsp), %rsi callq edwards25519_scalarmuldouble_epdouble // Multiple 3 - leaq 704(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 576(%rsp), %rbp + leaq TAB+2*128(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+1*128(%rsp), %rbp callq edwards25519_scalarmuldouble_epadd // Multiple 4 - leaq 832(%rsp), %rdi - leaq 576(%rsp), %rsi + leaq TAB+3*128(%rsp), %rdi + leaq TAB+1*128(%rsp), %rsi callq edwards25519_scalarmuldouble_epdouble // Multiple 5 - leaq 960(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 832(%rsp), %rbp + leaq TAB+4*128(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+3*128(%rsp), %rbp callq edwards25519_scalarmuldouble_epadd // Multiple 6 - leaq 1088(%rsp), %rdi - leaq 704(%rsp), %rsi + leaq TAB+5*128(%rsp), %rdi + leaq TAB+2*128(%rsp), %rsi callq edwards25519_scalarmuldouble_epdouble // Multiple 7 - leaq 1216(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 1088(%rsp), %rbp + leaq TAB+6*128(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+5*128(%rsp), %rbp callq edwards25519_scalarmuldouble_epadd // Multiple 8 - leaq 1344(%rsp), %rdi - leaq 832(%rsp), %rsi + leaq TAB+7*128(%rsp), %rdi + leaq TAB+3*128(%rsp), %rsi callq edwards25519_scalarmuldouble_epdouble // Handle the initialization, starting the loop counter at i = 252 @@ -636,7 +645,7 @@ edwards25519_scalarmuldouble_standard: // Index for btable entry... - movq 56(%rsp), %rax + movq BSCALAR+24(%rsp), %rax shrq $60, %rax movq %rax, bf @@ -872,22 +881,22 @@ edwards25519_scalarmuldouble_standard: movq 88(%rbp), %rsi cmovzq %rsi, %r15 - movq %rax, 352(%rsp) - movq %rbx, 360(%rsp) - movq %rcx, 368(%rsp) - movq %rdx, 376(%rsp) - movq %r8, 384(%rsp) - movq %r9, 392(%rsp) - movq %r10, 400(%rsp) - movq %r11, 408(%rsp) - movq %r12, 416(%rsp) - movq %r13, 424(%rsp) - movq %r14, 432(%rsp) - movq %r15, 440(%rsp) + movq %rax, BTABENT(%rsp) + movq %rbx, BTABENT+8(%rsp) + movq %rcx, BTABENT+16(%rsp) + movq %rdx, BTABENT+24(%rsp) + movq %r8, BTABENT+32(%rsp) + movq %r9, BTABENT+40(%rsp) + movq %r10, BTABENT+48(%rsp) + movq %r11, BTABENT+56(%rsp) + movq %r12, BTABENT+64(%rsp) + movq %r13, BTABENT+72(%rsp) + movq %r14, BTABENT+80(%rsp) + movq %r15, BTABENT+88(%rsp) // Index for table entry... - movq 24(%rsp), %rax + movq SCALAR+24(%rsp), %rax shrq $60, %rax movq %rax, bf @@ -903,7 +912,7 @@ edwards25519_scalarmuldouble_standard: xorl %r10d, %r10d xorl %r11d, %r11d - leaq 480(%rsp), %rbp + leaq TAB+32(%rsp), %rbp cmpq $1, bf movq (%rbp), %rsi @@ -1056,18 +1065,18 @@ edwards25519_scalarmuldouble_standard: movq 56(%rbp), %rsi cmovzq %rsi, %r11 - movq %rax, 256(%rsp) - movq %rbx, 264(%rsp) - movq %rcx, 272(%rsp) - movq %rdx, 280(%rsp) - movq %r8, 288(%rsp) - movq %r9, 296(%rsp) - movq %r10, 304(%rsp) - movq %r11, 312(%rsp) + movq %rax, TABENT+32(%rsp) + movq %rbx, TABENT+40(%rsp) + movq %rcx, TABENT+48(%rsp) + movq %rdx, TABENT+56(%rsp) + movq %r8, TABENT+64(%rsp) + movq %r9, TABENT+72(%rsp) + movq %r10, TABENT+80(%rsp) + movq %r11, TABENT+88(%rsp) // ...followed by the X and W fields - leaq 448(%rsp), %rbp + leaq TAB(%rsp), %rbp xorl %eax, %eax xorl %ebx, %ebx @@ -1229,20 +1238,20 @@ edwards25519_scalarmuldouble_standard: movq 120(%rbp), %rsi cmovzq %rsi, %r11 - movq %rax, 224(%rsp) - movq %rbx, 232(%rsp) - movq %rcx, 240(%rsp) - movq %rdx, 248(%rsp) - movq %r8, 320(%rsp) - movq %r9, 328(%rsp) - movq %r10, 336(%rsp) - movq %r11, 344(%rsp) + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) + movq %r8, TABENT+96(%rsp) + movq %r9, TABENT+104(%rsp) + movq %r10, TABENT+112(%rsp) + movq %r11, TABENT+120(%rsp) // Add those elements to initialize the accumulator for bit position 252 - leaq 96(%rsp), %rdi - leaq 224(%rsp), %rsi - leaq 352(%rsp), %rbp + leaq ACC(%rsp), %rdi + leaq TABENT(%rsp), %rsi + leaq BTABENT(%rsp), %rbp callq edwards25519_scalarmuldouble_pepadd // Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint @@ -1256,8 +1265,8 @@ edwards25519_scalarmuldouble_loop: // Double to acc' = 2 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_pdouble // Get btable entry, first getting the adjusted bitfield... @@ -1528,26 +1537,26 @@ edwards25519_scalarmuldouble_loop: movq %rax, %rsi cmovnzq %r8, %rsi cmovnzq %rax, %r8 - movq %rsi, 352(%rsp) - movq %r8, 384(%rsp) + movq %rsi, BTABENT(%rsp) + movq %r8, BTABENT+32(%rsp) movq %rbx, %rsi cmovnzq %r9, %rsi cmovnzq %rbx, %r9 - movq %rsi, 360(%rsp) - movq %r9, 392(%rsp) + movq %rsi, BTABENT+8(%rsp) + movq %r9, BTABENT+40(%rsp) movq %rcx, %rsi cmovnzq %r10, %rsi cmovnzq %rcx, %r10 - movq %rsi, 368(%rsp) - movq %r10, 400(%rsp) + movq %rsi, BTABENT+16(%rsp) + movq %r10, BTABENT+48(%rsp) movq %rdx, %rsi cmovnzq %r11, %rsi cmovnzq %rdx, %r11 - movq %rsi, 376(%rsp) - movq %r11, 408(%rsp) + movq %rsi, BTABENT+24(%rsp) + movq %r11, BTABENT+56(%rsp) xorq %rdi, %r12 xorq %rdi, %r13 @@ -1558,10 +1567,10 @@ edwards25519_scalarmuldouble_loop: sbbq $0, %r13 sbbq $0, %r14 sbbq $0, %r15 - movq %r12, 416(%rsp) - movq %r13, 424(%rsp) - movq %r14, 432(%rsp) - movq %r15, 440(%rsp) + movq %r12, BTABENT+64(%rsp) + movq %r13, BTABENT+72(%rsp) + movq %r14, BTABENT+80(%rsp) + movq %r15, BTABENT+88(%rsp) // Get table entry, first getting the adjusted bitfield... @@ -1592,7 +1601,7 @@ edwards25519_scalarmuldouble_loop: xorl %r10d, %r10d xorl %r11d, %r11d - leaq 480(%rsp), %rbp + leaq TAB+32(%rsp), %rbp cmpq $1, bf movq (%rbp), %rsi @@ -1745,18 +1754,18 @@ edwards25519_scalarmuldouble_loop: movq 56(%rbp), %rsi cmovzq %rsi, %r11 - movq %rax, 256(%rsp) - movq %rbx, 264(%rsp) - movq %rcx, 272(%rsp) - movq %rdx, 280(%rsp) - movq %r8, 288(%rsp) - movq %r9, 296(%rsp) - movq %r10, 304(%rsp) - movq %r11, 312(%rsp) + movq %rax, TABENT+32(%rsp) + movq %rbx, TABENT+40(%rsp) + movq %rcx, TABENT+48(%rsp) + movq %rdx, TABENT+56(%rsp) + movq %r8, TABENT+64(%rsp) + movq %r9, TABENT+72(%rsp) + movq %r10, TABENT+80(%rsp) + movq %r11, TABENT+88(%rsp) // Now do the X and W fields... - leaq 448(%rsp), %rbp + leaq TAB(%rsp), %rbp xorl %eax, %eax xorl %ebx, %ebx @@ -1950,51 +1959,51 @@ edwards25519_scalarmuldouble_loop: sbbq $0, %rcx sbbq $0, %rdx - movq %rax, 224(%rsp) - movq %rbx, 232(%rsp) - movq %rcx, 240(%rsp) - movq %rdx, 248(%rsp) + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) subq %rdi, %r8 sbbq $0, %r9 sbbq $0, %r10 sbbq $0, %r11 - movq %r8, 320(%rsp) - movq %r9, 328(%rsp) - movq %r10, 336(%rsp) - movq %r11, 344(%rsp) + movq %r8, TABENT+96(%rsp) + movq %r9, TABENT+104(%rsp) + movq %r10, TABENT+112(%rsp) + movq %r11, TABENT+120(%rsp) // Double to acc' = 4 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_pdouble // Add tabent := tabent + btabent - leaq 224(%rsp), %rdi - leaq 224(%rsp), %rsi - leaq 352(%rsp), %rbp + leaq TABENT(%rsp), %rdi + leaq TABENT(%rsp), %rsi + leaq BTABENT(%rsp), %rbp callq edwards25519_scalarmuldouble_pepadd // Double to acc' = 8 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_pdouble // Double to acc' = 16 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_epdouble // Add table entry, acc := acc + tabent - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi - leaq 224(%rsp), %rbp + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi + leaq TABENT(%rsp), %rbp callq edwards25519_scalarmuldouble_epadd // Loop down @@ -2003,423 +2012,1375 @@ edwards25519_scalarmuldouble_loop: testq %rax, %rax jnz edwards25519_scalarmuldouble_loop -// Modular inverse setup +// Prepare to call the modular inverse function to get tab = 1/z - movq $4, %rdi - leaq 224(%rsp), %rsi - leaq 160(%rsp), %rdx - leaq edwards25519_scalarmuldouble_p25519(%rip), %rcx - leaq 352(%rsp), %r8 + leaq TAB(%rsp), %rdi + leaq ACC+64(%rsp), %rsi -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites local variables that are no longer needed. +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, tab and acc. - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmuldouble_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -edwards25519_scalarmuldouble_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -edwards25519_scalarmuldouble_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmuldouble_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -edwards25519_scalarmuldouble_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp curve25519_x25519_midloop +curve25519_x25519_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne edwards25519_scalarmuldouble_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmuldouble_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmuldouble_wmontend -edwards25519_scalarmuldouble_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_wmontloop -edwards25519_scalarmuldouble_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_wcorrloop + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmuldouble_zmontend -edwards25519_scalarmuldouble_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_zmontloop -edwards25519_scalarmuldouble_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -edwards25519_scalarmuldouble_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +curve25519_x25519_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmuldouble_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -edwards25519_scalarmuldouble_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmuldouble_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -edwards25519_scalarmuldouble_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne edwards25519_scalarmuldouble_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -edwards25519_scalarmuldouble_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_fliploop - subq $0x3a, 0x20(%rsp) - ja edwards25519_scalarmuldouble_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne curve25519_x25519_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // Store result movq res, %rdi - leaq 96(%rsp), %rsi - leaq 224(%rsp), %rbp + leaq ACC(%rsp), %rsi + leaq TAB(%rsp), %rbp mul_p25519(x_0,x_1,x_2) movq res, %rdi addq $32, %rdi - leaq 128(%rsp), %rsi - leaq 224(%rsp), %rbp + leaq ACC+32(%rsp), %rsi + leaq TAB(%rsp), %rbp mul_p25519(x_0,x_1,x_2) // Restore stack and registers @@ -2528,14 +3489,6 @@ edwards25519_scalarmuldouble_pepadd: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -edwards25519_scalarmuldouble_p25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples. diff --git a/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S b/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S index 7f3dffa395..e17d10b47a 100644 --- a/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S +++ b/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S @@ -42,24 +42,33 @@ #define scalar (0*NUMSIZE)(%rsp) #define bscalar (1*NUMSIZE)(%rsp) -#define acc (3*NUMSIZE)(%rsp) +#define tabent (2*NUMSIZE)(%rsp) +#define btabent (6*NUMSIZE)(%rsp) -#define tabent (7*NUMSIZE)(%rsp) -#define btabent (11*NUMSIZE)(%rsp) +#define acc (9*NUMSIZE)(%rsp) -#define tab (14*NUMSIZE)(%rsp) +#define tab (13*NUMSIZE)(%rsp) // Additional variables kept on the stack -#define bf 2*NUMSIZE(%rsp) -#define cf 2*NUMSIZE+8(%rsp) -#define i 2*NUMSIZE+16(%rsp) -#define res 2*NUMSIZE+24(%rsp) +#define bf 45*NUMSIZE(%rsp) +#define cf 45*NUMSIZE+8(%rsp) +#define i 45*NUMSIZE+16(%rsp) +#define res 45*NUMSIZE+24(%rsp) // Total size to reserve on the stack (excluding local subroutines) #define NSPACE (46*NUMSIZE) +// Syntactic variants to make x86_att forms easier to generate + +#define SCALAR (0*NUMSIZE) +#define BSCALAR (1*NUMSIZE) +#define TABENT (2*NUMSIZE) +#define BTABENT (6*NUMSIZE) +#define ACC (9*NUMSIZE) +#define TAB (13*NUMSIZE) + // Sub-references used in local subroutines with local stack #define x_0 0(%rdi) @@ -610,10 +619,10 @@ edwards25519_scalarmuldouble_alt_standard: adcq %r13, %r9 adcq %r14, %r10 adcq %r15, %r11 - movq %r8, 32(%rsp) - movq %r9, 40(%rsp) - movq %r10, 48(%rsp) - movq %r11, 56(%rsp) + movq %r8, BSCALAR(%rsp) + movq %r9, BSCALAR+8(%rsp) + movq %r10, BSCALAR+16(%rsp) + movq %r11, BSCALAR+24(%rsp) movq (%rsi), %r8 movq 8(%rsi), %r9 @@ -634,10 +643,10 @@ edwards25519_scalarmuldouble_alt_standard: adcq %r13, %r9 adcq %r14, %r10 adcq %r15, %r11 - movq %r8, (%rsp) - movq %r9, 8(%rsp) - movq %r10, 16(%rsp) - movq %r11, 24(%rsp) + movq %r8, SCALAR(%rsp) + movq %r9, SCALAR+8(%rsp) + movq %r10, SCALAR+16(%rsp) + movq %r11, SCALAR+24(%rsp) // Create table of multiples 1..8 of the general input point at "tab". // Reduce the input coordinates x and y modulo 2^256 - 38 first, for the @@ -658,13 +667,13 @@ edwards25519_scalarmuldouble_alt_standard: adcq %r10, %rcx adcq %r11, %rsi cmovncq %r8, %rax - movq %rax, 448(%rsp) + movq %rax, TAB(%rsp) cmovncq %r9, %rbx - movq %rbx, 456(%rsp) + movq %rbx, TAB+8(%rsp) cmovncq %r10, %rcx - movq %rcx, 464(%rsp) + movq %rcx, TAB+16(%rsp) cmovncq %r11, %rsi - movq %rsi, 472(%rsp) + movq %rsi, TAB+24(%rsp) movl $38, %eax movq 32(%rdx), %r8 @@ -679,69 +688,69 @@ edwards25519_scalarmuldouble_alt_standard: adcq %r10, %rcx adcq %r11, %rsi cmovncq %r8, %rax - movq %rax, 480(%rsp) + movq %rax, TAB+32(%rsp) cmovncq %r9, %rbx - movq %rbx, 488(%rsp) + movq %rbx, TAB+40(%rsp) cmovncq %r10, %rcx - movq %rcx, 496(%rsp) + movq %rcx, TAB+48(%rsp) cmovncq %r11, %rsi - movq %rsi, 504(%rsp) + movq %rsi, TAB+56(%rsp) movl $1, %eax - movq %rax, 512(%rsp) + movq %rax, TAB+64(%rsp) xorl %eax, %eax - movq %rax, 520(%rsp) - movq %rax, 528(%rsp) - movq %rax, 536(%rsp) + movq %rax, TAB+72(%rsp) + movq %rax, TAB+80(%rsp) + movq %rax, TAB+88(%rsp) - leaq 544(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 480(%rsp), %rbp + leaq TAB+96(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+32(%rsp), %rbp mul_4(x_0,x_1,x_2) // Multiple 2 - leaq 576(%rsp), %rdi - leaq 448(%rsp), %rsi + leaq TAB+1*128(%rsp), %rdi + leaq TAB(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_epdouble // Multiple 3 - leaq 704(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 576(%rsp), %rbp + leaq TAB+2*128(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+1*128(%rsp), %rbp callq edwards25519_scalarmuldouble_alt_epadd // Multiple 4 - leaq 832(%rsp), %rdi - leaq 576(%rsp), %rsi + leaq TAB+3*128(%rsp), %rdi + leaq TAB+1*128(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_epdouble // Multiple 5 - leaq 960(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 832(%rsp), %rbp + leaq TAB+4*128(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+3*128(%rsp), %rbp callq edwards25519_scalarmuldouble_alt_epadd // Multiple 6 - leaq 1088(%rsp), %rdi - leaq 704(%rsp), %rsi + leaq TAB+5*128(%rsp), %rdi + leaq TAB+2*128(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_epdouble // Multiple 7 - leaq 1216(%rsp), %rdi - leaq 448(%rsp), %rsi - leaq 1088(%rsp), %rbp + leaq TAB+6*128(%rsp), %rdi + leaq TAB(%rsp), %rsi + leaq TAB+5*128(%rsp), %rbp callq edwards25519_scalarmuldouble_alt_epadd // Multiple 8 - leaq 1344(%rsp), %rdi - leaq 832(%rsp), %rsi + leaq TAB+7*128(%rsp), %rdi + leaq TAB+3*128(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_epdouble // Handle the initialization, starting the loop counter at i = 252 @@ -753,7 +762,7 @@ edwards25519_scalarmuldouble_alt_standard: // Index for btable entry... - movq 56(%rsp), %rax + movq BSCALAR+24(%rsp), %rax shrq $60, %rax movq %rax, bf @@ -989,22 +998,22 @@ edwards25519_scalarmuldouble_alt_standard: movq 88(%rbp), %rsi cmovzq %rsi, %r15 - movq %rax, 352(%rsp) - movq %rbx, 360(%rsp) - movq %rcx, 368(%rsp) - movq %rdx, 376(%rsp) - movq %r8, 384(%rsp) - movq %r9, 392(%rsp) - movq %r10, 400(%rsp) - movq %r11, 408(%rsp) - movq %r12, 416(%rsp) - movq %r13, 424(%rsp) - movq %r14, 432(%rsp) - movq %r15, 440(%rsp) + movq %rax, BTABENT(%rsp) + movq %rbx, BTABENT+8(%rsp) + movq %rcx, BTABENT+16(%rsp) + movq %rdx, BTABENT+24(%rsp) + movq %r8, BTABENT+32(%rsp) + movq %r9, BTABENT+40(%rsp) + movq %r10, BTABENT+48(%rsp) + movq %r11, BTABENT+56(%rsp) + movq %r12, BTABENT+64(%rsp) + movq %r13, BTABENT+72(%rsp) + movq %r14, BTABENT+80(%rsp) + movq %r15, BTABENT+88(%rsp) // Index for table entry... - movq 24(%rsp), %rax + movq SCALAR+24(%rsp), %rax shrq $60, %rax movq %rax, bf @@ -1020,7 +1029,7 @@ edwards25519_scalarmuldouble_alt_standard: xorl %r10d, %r10d xorl %r11d, %r11d - leaq 480(%rsp), %rbp + leaq TAB+32(%rsp), %rbp cmpq $1, bf movq (%rbp), %rsi @@ -1173,18 +1182,18 @@ edwards25519_scalarmuldouble_alt_standard: movq 56(%rbp), %rsi cmovzq %rsi, %r11 - movq %rax, 256(%rsp) - movq %rbx, 264(%rsp) - movq %rcx, 272(%rsp) - movq %rdx, 280(%rsp) - movq %r8, 288(%rsp) - movq %r9, 296(%rsp) - movq %r10, 304(%rsp) - movq %r11, 312(%rsp) + movq %rax, TABENT+32(%rsp) + movq %rbx, TABENT+40(%rsp) + movq %rcx, TABENT+48(%rsp) + movq %rdx, TABENT+56(%rsp) + movq %r8, TABENT+64(%rsp) + movq %r9, TABENT+72(%rsp) + movq %r10, TABENT+80(%rsp) + movq %r11, TABENT+88(%rsp) // ...followed by the X and W fields - leaq 448(%rsp), %rbp + leaq TAB(%rsp), %rbp xorl %eax, %eax xorl %ebx, %ebx @@ -1346,20 +1355,20 @@ edwards25519_scalarmuldouble_alt_standard: movq 120(%rbp), %rsi cmovzq %rsi, %r11 - movq %rax, 224(%rsp) - movq %rbx, 232(%rsp) - movq %rcx, 240(%rsp) - movq %rdx, 248(%rsp) - movq %r8, 320(%rsp) - movq %r9, 328(%rsp) - movq %r10, 336(%rsp) - movq %r11, 344(%rsp) + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) + movq %r8, TABENT+96(%rsp) + movq %r9, TABENT+104(%rsp) + movq %r10, TABENT+112(%rsp) + movq %r11, TABENT+120(%rsp) // Add those elements to initialize the accumulator for bit position 252 - leaq 96(%rsp), %rdi - leaq 224(%rsp), %rsi - leaq 352(%rsp), %rbp + leaq ACC(%rsp), %rdi + leaq TABENT(%rsp), %rsi + leaq BTABENT(%rsp), %rbp callq edwards25519_scalarmuldouble_alt_pepadd // Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint @@ -1373,8 +1382,8 @@ edwards25519_scalarmuldouble_alt_loop: // Double to acc' = 2 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_pdouble // Get btable entry, first getting the adjusted bitfield... @@ -1645,26 +1654,26 @@ edwards25519_scalarmuldouble_alt_loop: movq %rax, %rsi cmovnzq %r8, %rsi cmovnzq %rax, %r8 - movq %rsi, 352(%rsp) - movq %r8, 384(%rsp) + movq %rsi, BTABENT(%rsp) + movq %r8, BTABENT+32(%rsp) movq %rbx, %rsi cmovnzq %r9, %rsi cmovnzq %rbx, %r9 - movq %rsi, 360(%rsp) - movq %r9, 392(%rsp) + movq %rsi, BTABENT+8(%rsp) + movq %r9, BTABENT+40(%rsp) movq %rcx, %rsi cmovnzq %r10, %rsi cmovnzq %rcx, %r10 - movq %rsi, 368(%rsp) - movq %r10, 400(%rsp) + movq %rsi, BTABENT+16(%rsp) + movq %r10, BTABENT+48(%rsp) movq %rdx, %rsi cmovnzq %r11, %rsi cmovnzq %rdx, %r11 - movq %rsi, 376(%rsp) - movq %r11, 408(%rsp) + movq %rsi, BTABENT+24(%rsp) + movq %r11, BTABENT+56(%rsp) xorq %rdi, %r12 xorq %rdi, %r13 @@ -1675,10 +1684,10 @@ edwards25519_scalarmuldouble_alt_loop: sbbq $0, %r13 sbbq $0, %r14 sbbq $0, %r15 - movq %r12, 416(%rsp) - movq %r13, 424(%rsp) - movq %r14, 432(%rsp) - movq %r15, 440(%rsp) + movq %r12, BTABENT+64(%rsp) + movq %r13, BTABENT+72(%rsp) + movq %r14, BTABENT+80(%rsp) + movq %r15, BTABENT+88(%rsp) // Get table entry, first getting the adjusted bitfield... @@ -1709,7 +1718,7 @@ edwards25519_scalarmuldouble_alt_loop: xorl %r10d, %r10d xorl %r11d, %r11d - leaq 480(%rsp), %rbp + leaq TAB+32(%rsp), %rbp cmpq $1, bf movq (%rbp), %rsi @@ -1862,18 +1871,18 @@ edwards25519_scalarmuldouble_alt_loop: movq 56(%rbp), %rsi cmovzq %rsi, %r11 - movq %rax, 256(%rsp) - movq %rbx, 264(%rsp) - movq %rcx, 272(%rsp) - movq %rdx, 280(%rsp) - movq %r8, 288(%rsp) - movq %r9, 296(%rsp) - movq %r10, 304(%rsp) - movq %r11, 312(%rsp) + movq %rax, TABENT+32(%rsp) + movq %rbx, TABENT+40(%rsp) + movq %rcx, TABENT+48(%rsp) + movq %rdx, TABENT+56(%rsp) + movq %r8, TABENT+64(%rsp) + movq %r9, TABENT+72(%rsp) + movq %r10, TABENT+80(%rsp) + movq %r11, TABENT+88(%rsp) // Now do the X and W fields... - leaq 448(%rsp), %rbp + leaq TAB(%rsp), %rbp xorl %eax, %eax xorl %ebx, %ebx @@ -2067,51 +2076,51 @@ edwards25519_scalarmuldouble_alt_loop: sbbq $0, %rcx sbbq $0, %rdx - movq %rax, 224(%rsp) - movq %rbx, 232(%rsp) - movq %rcx, 240(%rsp) - movq %rdx, 248(%rsp) + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) subq %rdi, %r8 sbbq $0, %r9 sbbq $0, %r10 sbbq $0, %r11 - movq %r8, 320(%rsp) - movq %r9, 328(%rsp) - movq %r10, 336(%rsp) - movq %r11, 344(%rsp) + movq %r8, TABENT+96(%rsp) + movq %r9, TABENT+104(%rsp) + movq %r10, TABENT+112(%rsp) + movq %r11, TABENT+120(%rsp) // Double to acc' = 4 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_pdouble // Add tabent := tabent + btabent - leaq 224(%rsp), %rdi - leaq 224(%rsp), %rsi - leaq 352(%rsp), %rbp + leaq TABENT(%rsp), %rdi + leaq TABENT(%rsp), %rsi + leaq BTABENT(%rsp), %rbp callq edwards25519_scalarmuldouble_alt_pepadd // Double to acc' = 8 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_pdouble // Double to acc' = 16 * acc - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi callq edwards25519_scalarmuldouble_alt_epdouble // Add table entry, acc := acc + tabent - leaq 96(%rsp), %rdi - leaq 96(%rsp), %rsi - leaq 224(%rsp), %rbp + leaq ACC(%rsp), %rdi + leaq ACC(%rsp), %rsi + leaq TABENT(%rsp), %rbp callq edwards25519_scalarmuldouble_alt_epadd // Loop down @@ -2120,423 +2129,1375 @@ edwards25519_scalarmuldouble_alt_loop: testq %rax, %rax jnz edwards25519_scalarmuldouble_alt_loop -// Modular inverse setup +// Prepare to call the modular inverse function to get tab = 1/z - movq $4, %rdi - leaq 224(%rsp), %rsi - leaq 160(%rsp), %rdx - leaq edwards25519_scalarmuldouble_alt_p25519(%rip), %rcx - leaq 352(%rsp), %r8 + leaq TAB(%rsp), %rdi + leaq ACC+64(%rsp), %rsi -// Inline copy of bignum_modinv, identical except for stripping out the -// prologue and epilogue saving and restoring registers and the initial -// test for k = 0 (which is trivially false here since k = 4). For more -// details and explanations see "x86/generic/bignum_modinv.S". Note -// that the stack it uses for its own temporaries is 80 bytes so it -// only overwrites local variables that are no longer needed. +// Inline copy of bignum_inv_p25519, identical except for stripping out +// the prologue and epilogue saving and restoring registers and making +// and reclaiming room on the stack. For more details and explanations see +// "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for +// its own temporaries is 208 bytes, so it has no effect on variables +// that are needed in the rest of our computation here: res, tab and acc. - movq %rsi, 0x40(%rsp) - movq %r8, 0x38(%rsp) - movq %rcx, 0x48(%rsp) - leaq (%r8,%rdi,8), %r10 - movq %r10, 0x30(%rsp) - leaq (%r10,%rdi,8), %r15 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_copyloop: - movq (%rdx,%r9,8), %rax - movq (%rcx,%r9,8), %rbx - movq %rax, (%r10,%r9,8) - movq %rbx, (%r15,%r9,8) - movq %rbx, (%r8,%r9,8) - movq %r11, (%rsi,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_alt_copyloop - movq (%r8), %rax - movq %rax, %rbx - decq %rbx - movq %rbx, (%r8) - movq %rax, %rbp - movq %rax, %r12 - shlq $0x2, %rbp - subq %rbp, %r12 - xorq $0x2, %r12 - movq %r12, %rbp - imulq %rax, %rbp - movl $0x2, %eax - addq %rbp, %rax - addq $0x1, %rbp - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp - movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - imulq %rbp, %rbp + movq %rdi, 0xc0(%rsp) + xorl %eax, %eax + leaq -0x13(%rax), %rcx + notq %rax + movq %rcx, (%rsp) + movq %rax, 0x8(%rsp) + movq %rax, 0x10(%rsp) + btr $0x3f, %rax + movq %rax, 0x18(%rsp) + movq (%rsi), %rdx + movq 0x8(%rsi), %rcx + movq 0x10(%rsi), %r8 + movq 0x18(%rsi), %r9 movl $0x1, %eax - addq %rbp, %rax - imulq %rax, %r12 - movq %r12, 0x28(%rsp) - movq %rdi, %rax - shlq $0x7, %rax - movq %rax, 0x20(%rsp) -edwards25519_scalarmuldouble_alt_outerloop: - movq 0x20(%rsp), %r13 - addq $0x3f, %r13 - shrq $0x6, %r13 - cmpq %rdi, %r13 - cmovaeq %rdi, %r13 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi - xorq %r11, %r11 - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_toploop: - movq (%r8,%r9,8), %rbx - movq (%r15,%r9,8), %rcx - movq %r11, %r10 - andq %r12, %r10 - andq %rbp, %r11 - movq %rbx, %rax - orq %rcx, %rax - negq %rax - cmovbq %r10, %r14 - cmovbq %r11, %rsi - cmovbq %rbx, %r12 - cmovbq %rcx, %rbp - sbbq %r11, %r11 - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmuldouble_alt_toploop - movq %r12, %rax - orq %rbp, %rax - bsrq %rax, %rcx - xorq $0x3f, %rcx - shldq %cl, %r14, %r12 - shldq %cl, %rsi, %rbp - movq (%r8), %rax - movq %rax, %r14 - movq (%r15), %rax - movq %rax, %rsi - movl $0x1, %r10d - movl $0x0, %r11d - movl $0x0, %ecx - movl $0x1, %edx - movl $0x3a, %r9d - movq %rdi, 0x8(%rsp) - movq %r13, 0x10(%rsp) - movq %r8, (%rsp) - movq %r15, 0x18(%rsp) -edwards25519_scalarmuldouble_alt_innerloop: + xorl %r10d, %r10d + bts $0x3f, %r9 + adcq %r10, %rax + imulq $0x13, %rax, %rax + addq %rax, %rdx + adcq %r10, %rcx + adcq %r10, %r8 + adcq %r10, %r9 + movl $0x13, %eax + cmovbq %r10, %rax + subq %rax, %rdx + sbbq %r10, %rcx + sbbq %r10, %r8 + sbbq %r10, %r9 + btr $0x3f, %r9 + movq %rdx, 0x20(%rsp) + movq %rcx, 0x28(%rsp) + movq %r8, 0x30(%rsp) + movq %r9, 0x38(%rsp) xorl %eax, %eax + movq %rax, 0x40(%rsp) + movq %rax, 0x48(%rsp) + movq %rax, 0x50(%rsp) + movq %rax, 0x58(%rsp) + movabsq $0xa0f99e2375022099, %rax + movq %rax, 0x60(%rsp) + movabsq $0xa8c68f3f1d132595, %rax + movq %rax, 0x68(%rsp) + movabsq $0x6c6c893805ac5242, %rax + movq %rax, 0x70(%rsp) + movabsq $0x276508b241770615, %rax + movq %rax, 0x78(%rsp) + movq $0xa, 0x90(%rsp) + movq $0x1, 0x98(%rsp) + jmp curve25519_x25519_midloop +curve25519_x25519_inverseloop: + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, 0x80(%rsp) + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, 0x88(%rsp) xorl %ebx, %ebx - xorq %r8, %r8 - xorq %r15, %r15 - btq $0x0, %r14 - cmovbq %rbp, %rax - cmovbq %rsi, %rbx - cmovbq %rcx, %r8 - cmovbq %rdx, %r15 - movq %r14, %r13 - subq %rbx, %r14 - subq %r13, %rbx - movq %r12, %rdi - subq %rax, %rdi - cmovbq %r12, %rbp - leaq -0x1(%rdi), %r12 - cmovbq %rbx, %r14 - cmovbq %r13, %rsi - notq %r12 - cmovbq %r10, %rcx - cmovbq %r11, %rdx - cmovaeq %rdi, %r12 - shrq $1, %r14 - addq %r8, %r10 - addq %r15, %r11 - shrq $1, %r12 - addq %rcx, %rcx - addq %rdx, %rdx - decq %r9 - jne edwards25519_scalarmuldouble_alt_innerloop - movq 0x8(%rsp), %rdi - movq 0x10(%rsp), %r13 - movq (%rsp), %r8 - movq 0x18(%rsp), %r15 - movq %r10, (%rsp) - movq %r11, 0x8(%rsp) - movq %rcx, 0x10(%rsp) - movq %rdx, 0x18(%rsp) - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - xorq %r14, %r14 - xorq %rsi, %rsi - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_congloop: - movq (%r8,%r9,8), %rcx movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r12 - movq 0x10(%rsp), %rax - mulq %rcx + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x20(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + xorl %ebp, %ebp + movq (%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x20(%rsp), %rax + xorq %r15, %rax + mulq %r14 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %rbp - movq (%r15,%r9,8), %rcx + adcq %rdx, %rbp + xorl %ecx, %ecx movq 0x8(%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq %rdx, %r12 - shrdq $0x3a, %r14, %r10 - movq %r10, (%r8,%r9,8) - movq %r14, %r10 - movq %r12, %r14 + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x28(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $0x3b, %rbx, %rdi + movq %rdi, (%rsp) + xorl %edi, %edi + movq 0x8(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq 0x28(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $0x3b, %rbp, %rsi + movq %rsi, 0x20(%rsp) + xorl %esi, %esi + movq 0x10(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq 0x30(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $0x3b, %rcx, %rbx + movq %rbx, 0x8(%rsp) + xorl %ebx, %ebx + movq 0x10(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq 0x30(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $0x3b, %rdi, %rbp + movq %rbp, 0x28(%rsp) movq 0x18(%rsp), %rax - mulq %rcx + xorq %r9, %rax + movq %rax, %rbp + sarq $0x3f, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 addq %rax, %rsi adcq %rdx, %rbp - shrdq $0x3a, %rsi, %r11 - movq %r11, (%r15,%r9,8) - movq %rsi, %r11 - movq %rbp, %rsi - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_alt_congloop - shldq $0x6, %r10, %r14 - shldq $0x6, %r11, %rsi - movq 0x48(%rsp), %r15 - movq (%r8), %rbx - movq 0x28(%rsp), %r12 - imulq %rbx, %r12 - movq (%r15), %rax + movq 0x38(%rsp), %rax + xorq %r11, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $0x3b, %rsi, %rcx + movq %rcx, 0x10(%rsp) + shrdq $0x3b, %rbp, %rsi + movq 0x18(%rsp), %rax + movq %rsi, 0x18(%rsp) + xorq %r13, %rax + movq %rax, %rsi + sarq $0x3f, %rsi + andq %r12, %rsi + negq %rsi mulq %r12 - addq %rbx, %rax - movq %rdx, %r10 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmuldouble_alt_wmontend -edwards25519_scalarmuldouble_alt_wmontloop: - adcq (%r8,%r9,8), %r10 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax + addq %rax, %rbx + adcq %rdx, %rsi + movq 0x38(%rsp), %rax + xorq %r15, %rax + movq %rax, %rdx + sarq $0x3f, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $0x3b, %rbx, %rdi + movq %rdi, 0x30(%rsp) + shrdq $0x3b, %rsi, %rbx + movq %rbx, 0x38(%rsp) + movq 0x80(%rsp), %rbx + movq 0x88(%rsp), %rbp + xorl %ecx, %ecx + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x40(%rsp), %rax + xorq %r13, %rax mulq %r12 - subq %rbx, %rdx - addq %r10, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r10 - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_alt_wmontloop -edwards25519_scalarmuldouble_alt_wmontend: - adcq %r14, %r10 - movq %r10, -0x8(%r8,%rdi,8) - sbbq %r10, %r10 - negq %r10 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_wcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_alt_wcmploop - sbbq $0x0, %r10 - sbbq %r10, %r10 - notq %r10 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_wcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r10, %rbx - negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_alt_wcorrloop + movq %rbx, 0x40(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x60(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x60(%rsp) + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + xorl %ebp, %ebp + movq 0x48(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, 0x48(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq 0x68(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, 0x68(%rsp) + xorl %ecx, %ecx + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + xorl %esi, %esi + movq 0x50(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, 0x50(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq 0x70(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, 0x70(%rsp) + movq 0x58(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + movq %rdx, %rbx + shldq $0x1, %rcx, %rdx + sarq $0x3f, %rbx + addq %rbx, %rdx + movl $0x13, %eax + imulq %rdx movq 0x40(%rsp), %r8 - movq (%r8), %rbx - movq 0x28(%rsp), %rbp - imulq %rbx, %rbp - movq (%r15), %rax - mulq %rbp - addq %rbx, %rax - movq %rdx, %r11 - movl $0x1, %r9d - movq %rdi, %rcx - decq %rcx - je edwards25519_scalarmuldouble_alt_zmontend -edwards25519_scalarmuldouble_alt_zmontloop: - adcq (%r8,%r9,8), %r11 - sbbq %rbx, %rbx - movq (%r15,%r9,8), %rax - mulq %rbp - subq %rbx, %rdx - addq %r11, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %rdx, %r11 - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_alt_zmontloop -edwards25519_scalarmuldouble_alt_zmontend: - adcq %rsi, %r11 - movq %r11, -0x8(%r8,%rdi,8) - sbbq %r11, %r11 - negq %r11 - movq %rdi, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_zcmploop: - movq (%r8,%r9,8), %rax - sbbq (%r15,%r9,8), %rax - incq %r9 - decq %rcx - jne edwards25519_scalarmuldouble_alt_zcmploop - sbbq $0x0, %r11 - sbbq %r11, %r11 - notq %r11 - xorq %rcx, %rcx - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_zcorrloop: - movq (%r8,%r9,8), %rax - movq (%r15,%r9,8), %rbx - andq %r11, %rbx + addq %rax, %r8 + movq %r8, 0x40(%rsp) + movq 0x48(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x48(%rsp) + movq 0x50(%rsp), %r8 + adcq %rbx, %r8 + movq %r8, 0x50(%rsp) + adcq %rbx, %rcx + shlq $0x3f, %rax + addq %rax, %rcx + movq 0x58(%rsp), %rax + movq %rcx, 0x58(%rsp) + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx negq %rcx - sbbq %rbx, %rax - sbbq %rcx, %rcx - movq %rax, (%r8,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_alt_zcorrloop - movq 0x30(%rsp), %r8 - leaq (%r8,%rdi,8), %r15 - xorq %r9, %r9 - xorq %r12, %r12 - xorq %r14, %r14 - xorq %rbp, %rbp - xorq %rsi, %rsi -edwards25519_scalarmuldouble_alt_crossloop: - movq (%r8,%r9,8), %rcx - movq (%rsp), %rax - mulq %rcx - addq %rax, %r14 - adcq $0x0, %rdx - movq %rdx, %r10 - movq 0x10(%rsp), %rax - mulq %rcx + mulq %r12 addq %rax, %rsi - adcq $0x0, %rdx - movq %rdx, %r11 - movq (%r15,%r9,8), %rcx - movq 0x8(%rsp), %rax - mulq %rcx - subq %r12, %rdx - subq %rax, %r14 - sbbq %rdx, %r10 - sbbq %r12, %r12 - movq %r14, (%r8,%r9,8) - movq %r10, %r14 - movq 0x18(%rsp), %rax - mulq %rcx - subq %rbp, %rdx + adcq %rdx, %rcx + movq 0x78(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rdx, %rcx + shldq $0x1, %rsi, %rdx + sarq $0x3f, %rcx + movl $0x13, %eax + addq %rcx, %rdx + imulq %rdx + movq 0x60(%rsp), %r8 + addq %rax, %r8 + movq %r8, 0x60(%rsp) + movq 0x68(%rsp), %r8 + adcq %rdx, %r8 + movq %r8, 0x68(%rsp) + movq 0x70(%rsp), %r8 + adcq %rcx, %r8 + movq %r8, 0x70(%rsp) + adcq %rcx, %rsi + shlq $0x3f, %rax + addq %rax, %rsi + movq %rsi, 0x78(%rsp) +curve25519_x25519_midloop: + movq 0x98(%rsp), %rsi + movq (%rsp), %rdx + movq 0x20(%rsp), %rcx + movq %rdx, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + xorl %ebp, %ebp + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 subq %rax, %rsi - sbbq %rdx, %r11 - sbbq %rbp, %rbp - movq %rsi, (%r15,%r9,8) - movq %r11, %rsi - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmuldouble_alt_crossloop - xorq %r9, %r9 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r12, %r14 - xorq %rbp, %rsi -edwards25519_scalarmuldouble_alt_optnegloop: - movq (%r8,%r9,8), %rax - xorq %r12, %rax - negq %r10 - adcq $0x0, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rax - xorq %rbp, %rax - negq %r11 - adcq $0x0, %rax - sbbq %r11, %r11 - movq %rax, (%r15,%r9,8) - incq %r9 - cmpq %r13, %r9 - jb edwards25519_scalarmuldouble_alt_optnegloop - subq %r10, %r14 - subq %r11, %rsi - movq %r13, %r9 -edwards25519_scalarmuldouble_alt_shiftloop: - movq -0x8(%r8,%r9,8), %rax - movq %rax, %r10 - shrdq $0x3a, %r14, %rax - movq %rax, -0x8(%r8,%r9,8) - movq %r10, %r14 - movq -0x8(%r15,%r9,8), %rax - movq %rax, %r11 - shrdq $0x3a, %rsi, %rax - movq %rax, -0x8(%r15,%r9,8) - movq %r11, %rsi - decq %r9 - jne edwards25519_scalarmuldouble_alt_shiftloop - notq %rbp - movq 0x48(%rsp), %rcx - movq 0x38(%rsp), %r8 - movq 0x40(%rsp), %r15 - movq %r12, %r10 - movq %rbp, %r11 - xorq %r9, %r9 -edwards25519_scalarmuldouble_alt_fliploop: - movq %rbp, %rdx - movq (%rcx,%r9,8), %rax - andq %rax, %rdx - andq %r12, %rax - movq (%r8,%r9,8), %rbx - xorq %r12, %rbx - negq %r10 - adcq %rbx, %rax - sbbq %r10, %r10 - movq %rax, (%r8,%r9,8) - movq (%r15,%r9,8), %rbx - xorq %rbp, %rbx - negq %r11 - adcq %rbx, %rdx - sbbq %r11, %r11 - movq %rdx, (%r15,%r9,8) - incq %r9 - cmpq %rdi, %r9 - jb edwards25519_scalarmuldouble_alt_fliploop - subq $0x3a, 0x20(%rsp) - ja edwards25519_scalarmuldouble_alt_outerloop + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %rdx + leaq (%rcx,%rax), %rdi + shlq $0x16, %rdx + shlq $0x16, %rdi + sarq $0x2b, %rdx + sarq $0x2b, %rdi + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %rbx + leaq (%rcx,%rax), %rcx + sarq $0x2a, %rbx + sarq $0x2a, %rcx + movq %rdx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rdi, 0xb0(%rsp) + movq %rcx, 0xb8(%rsp) + movq (%rsp), %r12 + imulq %r12, %rdi + imulq %rdx, %r12 + movq 0x20(%rsp), %r13 + imulq %r13, %rbx + imulq %rcx, %r13 + addq %rbx, %r12 + addq %rdi, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r10 + shlq $0x16, %r8 + shlq $0x16, %r10 + sarq $0x2b, %r8 + sarq $0x2b, %r10 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r15 + leaq (%rcx,%rax), %r11 + sarq $0x2a, %r15 + sarq $0x2a, %r11 + movq %r13, %rbx + movq %r12, %rcx + imulq %r8, %r12 + imulq %r15, %rbx + addq %rbx, %r12 + imulq %r11, %r13 + imulq %r10, %rcx + addq %rcx, %r13 + sarq $0x14, %r12 + sarq $0x14, %r13 + movq %r12, %rbx + andq $0xfffff, %rbx + movabsq $0xfffffe0000000000, %rax + orq %rax, %rbx + movq %r13, %rcx + andq $0xfffff, %rcx + movabsq $0xc000000000000000, %rax + orq %rax, %rcx + movq 0xa0(%rsp), %rax + imulq %r8, %rax + movq 0xb0(%rsp), %rdx + imulq %r15, %rdx + imulq 0xa8(%rsp), %r8 + imulq 0xb8(%rsp), %r15 + addq %r8, %r15 + leaq (%rax,%rdx), %r9 + movq 0xa0(%rsp), %rax + imulq %r10, %rax + movq 0xb0(%rsp), %rdx + imulq %r11, %rdx + imulq 0xa8(%rsp), %r10 + imulq 0xb8(%rsp), %r11 + addq %r10, %r11 + leaq (%rax,%rdx), %r13 + movq $0xfffffffffffffffe, %rax + movl $0x2, %edx + movq %rbx, %rdi + movq %rax, %r8 + testq %rsi, %rsi + cmovs %rbp, %r8 + testq $0x1, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + cmovs %rbp, %r8 + movq %rbx, %rdi + testq %rdx, %rcx + cmoveq %rbp, %r8 + cmoveq %rbp, %rdi + sarq $1, %rcx + xorq %r8, %rdi + xorq %r8, %rsi + btq $0x3f, %r8 + cmovbq %rcx, %rbx + movq %rax, %r8 + subq %rax, %rsi + leaq (%rcx,%rdi), %rcx + sarq $1, %rcx + movl $0x100000, %eax + leaq (%rbx,%rax), %r8 + leaq (%rcx,%rax), %r12 + shlq $0x15, %r8 + shlq $0x15, %r12 + sarq $0x2b, %r8 + sarq $0x2b, %r12 + movabsq $0x20000100000, %rax + leaq (%rbx,%rax), %r10 + leaq (%rcx,%rax), %r14 + sarq $0x2b, %r10 + sarq $0x2b, %r14 + movq %r9, %rax + imulq %r8, %rax + movq %r13, %rdx + imulq %r10, %rdx + imulq %r15, %r8 + imulq %r11, %r10 + addq %r8, %r10 + leaq (%rax,%rdx), %r8 + movq %r9, %rax + imulq %r12, %rax + movq %r13, %rdx + imulq %r14, %rdx + imulq %r15, %r12 + imulq %r11, %r14 + addq %r12, %r14 + leaq (%rax,%rdx), %r12 + movq %rsi, 0x98(%rsp) + decq 0x90(%rsp) + jne curve25519_x25519_inverseloop + movq (%rsp), %rax + movq 0x20(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $0x3f, %rax + movq %r8, %r9 + sarq $0x3f, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + movq %r10, %r11 + sarq $0x3f, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + movq %r12, %r13 + sarq $0x3f, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + movq %r14, %r15 + sarq $0x3f, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x60(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movq 0x48(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq 0x68(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq 0x50(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x70(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + movq 0x58(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq 0x78(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + movq %r9, %rax + shldq $0x1, %r15, %rax + sarq $0x3f, %r9 + movl $0x13, %ebx + leaq 0x1(%rax,%r9,1), %rax + imulq %rbx + xorl %ebp, %ebp + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r9, %r14 + adcq %r9, %r15 + shlq $0x3f, %rax + addq %rax, %r15 + cmovns %rbp, %rbx + subq %rbx, %r12 + sbbq %rbp, %r13 + sbbq %rbp, %r14 + sbbq %rbp, %r15 + btr $0x3f, %r15 + movq 0xc0(%rsp), %rdi + movq %r12, (%rdi) + movq %r13, 0x8(%rdi) + movq %r14, 0x10(%rdi) + movq %r15, 0x18(%rdi) // Store result movq res, %rdi - leaq 96(%rsp), %rsi - leaq 224(%rsp), %rbp + leaq ACC(%rsp), %rsi + leaq TAB(%rsp), %rbp mul_p25519(x_0,x_1,x_2) movq res, %rdi addq $32, %rdi - leaq 128(%rsp), %rsi - leaq 224(%rsp), %rbp + leaq ACC+32(%rsp), %rsi + leaq TAB(%rsp), %rbp mul_p25519(x_0,x_1,x_2) // Restore stack and registers @@ -2645,14 +3606,6 @@ edwards25519_scalarmuldouble_alt_pepadd: // .section .rodata // **************************************************************************** -// The modulus p_25519 = 2^255 - 19, for the modular inverse - -edwards25519_scalarmuldouble_alt_p25519: - .quad 0xffffffffffffffed - .quad 0xffffffffffffffff - .quad 0xffffffffffffffff - .quad 0x7fffffffffffffff - // Precomputed table of multiples of generator for edwards25519 // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples.