rust-lang · JanBerktold · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -221,6 +221,10 @@ jobs:
       shell: bash
       if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
 
+    - run: objdump --version
+      if: matrix.target.tuple == 'x86_64-apple-darwin'
+      shell: bash
+
     # Windows & OSX go straight to `run.sh` ...
     - run: ./ci/run.sh
       shell: bash

diff --git a/crates/core_arch/src/x86/sha.rs b/crates/core_arch/src/x86/sha.rs
@@ -16,6 +16,12 @@ extern "C" {
     fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
     #[link_name = "llvm.x86.sha256rnds2"]
     fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
+    #[link_name = "llvm.x86.vsha512msg1"]
+    fn vsha512msg1(a: i64x4, b: i64x2) -> i64x4;
+    #[link_name = "llvm.x86.vsha512msg2"]
+    fn vsha512msg2(a: i64x4, b: i64x4) -> i64x4;
+    #[link_name = "llvm.x86.vsha512rnds2"]
+    fn vsha512rnds2(a: i64x4, b: i64x4, c: i64x2) -> i64x4;
 }
 
 #[cfg(test)]
@@ -118,6 +124,43 @@ pub unsafe fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m12
     transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
 }
 
+/// Performs an intermediate calculation for the next four SHA512 message qwords.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg1_epi64)
+#[inline]
+#[target_feature(enable = "sha512,avx")]
+#[cfg_attr(test, assert_instr(vsha512msg1))]
+#[unstable(feature = "sha512", issue = "none")]
+pub unsafe fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
+    transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2()))
+}
+
+/// Performs the final calculation for the next four SHA512 message qwords.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512msg2_epi64)
+#[inline]
+#[target_feature(enable = "sha512,avx")]
+#[cfg_attr(test, assert_instr(vsha512msg2))]
+#[unstable(feature = "sha512", issue = "none")]
+pub unsafe fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
+    transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4()))
+}
+
+/// Performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from `a`,
+/// an initial SHA512 state (A,B,E,F) from `b`, and a pre-computed sum of the next two
+/// round message qwords and the corresponding round constants from `c` (only the two
+/// lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is returned, and
+/// can be used as the updated state (C,D,G,H) in later rounds.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64)
+#[inline]
+#[target_feature(enable = "sha512,avx")]
+#[cfg_attr(test, assert_instr(vsha512rnds2))]
+#[unstable(feature = "sha512", issue = "none")]
+pub unsafe fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, c: __m128i) -> __m256i {
+    transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), c.as_i64x2()))
+}
+
 #[cfg(test)]
 mod tests {
     use std::{
@@ -215,4 +258,35 @@ mod tests {
         let r = _mm_sha256rnds2_epu32(a, b, k);
         assert_eq_m128i(r, expected);
     }
+
+    #[simd_test(enable = "sha512,avx")]
+    #[allow(overflowing_literals)]
+    unsafe fn test_mm256_sha512msg1_epi64() {
+        let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
+        let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
+        let expected = _mm256_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee, 0x0, 0x0);
+        let r = _mm256_sha512msg1_epi64(a, b);
+        assert_eq_m256i(r, expected);
+    }
+
+    #[simd_test(enable = "sha512,avx")]
+    #[allow(overflowing_literals)]
+    unsafe fn test_mm256_sha512msg2_epi64() {
+        let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
+        let b = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
+        let expected = _mm256_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35, 0x0, 0x0);
+        let r = _mm256_sha512msg2_epi64(a, b);
+        assert_eq_m256i(r, expected);
+    }
+
+    #[simd_test(enable = "sha512,avx")]
+    #[allow(overflowing_literals)]
+    unsafe fn test_mm256_sha512rnds2_epi64() {
+        let a = _mm256_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98, 0x0, 0x0);
+        let b = _mm256_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b, 0x0, 0x0);
+        let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
+        let expected = _mm256_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19, 0x0, 0x0);
+        let r = _mm256_sha512rnds2_epi64(a, b, k);
+        assert_eq_m256i(r, expected);
+    }
 }