Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SHA512 intrinsic #1671

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ jobs:
shell: bash
if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'

- run: objdump --version
if: matrix.target.tuple == 'x86_64-apple-darwin'
shell: bash

# Windows & OSX go straight to `run.sh` ...
- run: ./ci/run.sh
shell: bash
Expand Down
74 changes: 74 additions & 0 deletions crates/core_arch/src/x86/sha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ extern "C" {
fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.sha256rnds2"]
fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
#[link_name = "llvm.x86.vsha512msg1"]
fn vsha512msg1(a: i64x4, b: i64x2) -> i64x4;
#[link_name = "llvm.x86.vsha512msg2"]
fn vsha512msg2(a: i64x4, b: i64x4) -> i64x4;
#[link_name = "llvm.x86.vsha512rnds2"]
fn vsha512rnds2(a: i64x4, b: i64x4, c: i64x2) -> i64x4;
}

#[cfg(test)]
Expand Down Expand Up @@ -118,6 +124,43 @@ pub unsafe fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m12
transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
}

/// Performs an intermediate calculation for the next four SHA512 message qwords.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64)
#[inline]
#[target_feature(enable = "sha512,avx")]
#[cfg_attr(test, assert_instr(vsha512msg1))]
#[unstable(feature = "sha512", issue = "none")]
pub unsafe fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2()))
}

/// Performs the final calculation for the next four SHA512 message qwords.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64)
#[inline]
#[target_feature(enable = "sha512,avx")]
#[cfg_attr(test, assert_instr(vsha512msg2))]
#[unstable(feature = "sha512", issue = "none")]
pub unsafe fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4()))
}

/// Performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from `a`,
/// an initial SHA512 state (A,B,E,F) from `b`, and a pre-computed sum of the next two
/// round message qwords and the corresponding round constants from `c` (only the two
/// lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is returned, and
/// can be used as the updated state (C,D,G,H) in later rounds.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64)
#[inline]
#[target_feature(enable = "sha512,avx")]
#[cfg_attr(test, assert_instr(vsha512rnds2))]
#[unstable(feature = "sha512", issue = "none")]
pub unsafe fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, c: __m128i) -> __m256i {
transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), c.as_i64x2()))
}

#[cfg(test)]
mod tests {
use std::{
Expand Down Expand Up @@ -215,4 +258,35 @@ mod tests {
let r = _mm_sha256rnds2_epu32(a, b, k);
assert_eq_m128i(r, expected);
}

#[simd_test(enable = "sha512,avx")]
#[allow(overflowing_literals)]
unsafe fn test_mm256_sha512msg1_epi64() {
let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
let expected = _mm256_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee, 0x0, 0x0);
let r = _mm256_sha512msg1_epi64(a, b);
assert_eq_m256i(r, expected);
}

#[simd_test(enable = "sha512,avx")]
#[allow(overflowing_literals)]
unsafe fn test_mm256_sha512msg2_epi64() {
let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
let b = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
let expected = _mm256_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35, 0x0, 0x0);
let r = _mm256_sha512msg2_epi64(a, b);
assert_eq_m256i(r, expected);
}

#[simd_test(enable = "sha512,avx")]
#[allow(overflowing_literals)]
unsafe fn test_mm256_sha512rnds2_epi64() {
let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
let b = _mm256_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b, 0x0, 0x0);
let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
let expected = _mm256_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19, 0x0, 0x0);
let r = _mm256_sha512rnds2_epi64(a, b, k);
assert_eq_m256i(r, expected);
}
}
Loading