diff --git a/src/External/RawIntrinsics/AVX.cs b/src/External/RawIntrinsics/AVX.cs
index c693c71..5f80d8f 100644
--- a/src/External/RawIntrinsics/AVX.cs
+++ b/src/External/RawIntrinsics/AVX.cs
@@ -322,6 +322,22 @@ public static unsafe partial class AVX
/// __m256 dst {FP32}
public static __m256 _mm256_broadcast_ss(float* mem_addr) => System.Runtime.Intrinsics.X86.Avx.BroadcastScalarToVector256(mem_addr);
+ ///
+ /// Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst".
+ ///
+ /// VROUNDPD ymm, ymm, imm8
+ /// __m256d {FP64}
+ /// __m256d dst {FP64}
+ public static __m256d _mm256_ceil_pd(__m256d a) => System.Runtime.Intrinsics.X86.Avx.Ceiling(a.FP64);
+
+ ///
+ /// Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst".
+ ///
+ /// VROUNDPS ymm, ymm, imm8
+ /// __m256 {FP32}
+ /// __m256 dst {FP32}
+ public static __m256 _mm256_ceil_ps(__m256 a) => System.Runtime.Intrinsics.X86.Avx.Ceiling(a.FP32);
+
///
/// Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".
///
@@ -986,6 +1002,15 @@ public static unsafe partial class AVX
/// __m256d dst {FP64}
public static __m256d _mm256_setr_m128d(__m128d lo, __m128d hi) => System.Runtime.Intrinsics.Vector256.Create(lo.FP64, hi.FP64);
+ ///
+ /// Set packed __m256i vector "dst" with the supplied values.
+ ///
+ /// VINSERTF128 ymm, ymm, xmm, imm8
+ /// __m128i {M128}
+ /// __m128i {M128}
+ /// __m256i dst {M128}
+ public static __m256i _mm256_setr_m128i(__m128i lo, __m128i hi) => System.Runtime.Intrinsics.Vector256.Create(lo.SI32, hi.SI32);
+
///
/// Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order.
///
diff --git a/src/External/RawIntrinsics/Other.cs b/src/External/RawIntrinsics/Other.cs
index d8c0988..4ccc689 100644
--- a/src/External/RawIntrinsics/Other.cs
+++ b/src/External/RawIntrinsics/Other.cs
@@ -55,6 +55,16 @@ public static unsafe partial class Other
/// __m128i dst {M128}
public static __m128i _mm_aeskeygenassist_si128(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Aes.KeygenAssist(a.UI8, (byte)imm8);
+ ///
+ /// Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst".
+ ///
+ /// PCLMULQDQ xmm, xmm, imm8
+ /// __m128i {M128}
+ /// __m128i {M128}
+ /// int {IMM}
+ /// __m128i dst {M128}
+ public static __m128i _mm_clmulepi64_si128(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(a.SI64, b.SI64, (byte)imm8);
+
///
/// Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst".
///
diff --git a/src/External/RawIntrinsics/SSE2.cs b/src/External/RawIntrinsics/SSE2.cs
index 0279253..8f2a747 100644
--- a/src/External/RawIntrinsics/SSE2.cs
+++ b/src/External/RawIntrinsics/SSE2.cs
@@ -756,6 +756,14 @@ public static unsafe partial class SSE2
/// __m128d dst {FP64}
public static __m128d _mm_loadh_pd(__m128d a, double* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadHigh(a.FP64, mem_addr);
+ ///
+ /// Load 64-bit integer from memory into the first element of "dst".
+ ///
+ /// MOVQ xmm, m64
+ /// __m128i {UI64}
+ /// __m128i dst {UI64}
+ public static __m128i _mm_loadl_epi64(__m128i* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadScalarVector128((long*)mem_addr);
+
///
/// Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary.
///
@@ -781,6 +789,14 @@ public static unsafe partial class SSE2
/// __m128i dst {M128}
public static __m128i _mm_loadu_si128(__m128i* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadVector128((sbyte*)mem_addr);
+ ///
+ /// Load unaligned 32-bit integer from memory into the first element of "dst".
+ ///
+ /// MOVD xmm, m32
+ /// void {UI32}
+ /// __m128i dst {UI32}
+ public static __m128i _mm_loadu_si32(void* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadScalarVector128((int*)mem_addr);
+
///
/// Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
///
@@ -1086,6 +1102,15 @@ public static unsafe partial class SSE2
/// __m128i dst {UI32}
public static __m128i _mm_setr_epi32(int e3, int e2, int e1, int e0) => System.Runtime.Intrinsics.Vector128.Create((uint)e3, (uint)e2, (uint)e1, (uint)e0);
+ ///
+ /// Set packed 64-bit integers in "dst" with the supplied values in reverse order.
+ ///
+ ///
+ /// __m64 {UI64}
+ /// __m64 {UI64}
+ /// __m128i dst {UI64}
+ public static __m128i _mm_setr_epi64(__m64 e1, __m64 e0) => System.Runtime.Intrinsics.Vector128.Create(e1.SI32, e0.SI32);
+
///
/// Set packed 8-bit integers in "dst" with the supplied values in reverse order.
///
diff --git a/src/External/RawIntrinsicsGenerator/Generator.cs b/src/External/RawIntrinsicsGenerator/Generator.cs
index 7c6f33c..d593473 100644
--- a/src/External/RawIntrinsicsGenerator/Generator.cs
+++ b/src/External/RawIntrinsicsGenerator/Generator.cs
@@ -183,6 +183,12 @@ private static async Task Generate(string sriUrl, Regex cppIntrinsicNameMatcher,
throw new Exception("Unexpected error - couldnt find end of c method intrinsic");
var cMethod = xmlDoc[cMethodIndex..cMethodEndIndex];
+ // Remove when merged: https://github.com/dotnet/runtime/pull/88552
+ if (cMethod == "_mm256_ceil_ps" && methodSymbol.Name == "Floor")
+ cMethod = "_mm256_floor_ps";
+ if (cMethod == "_mm256_ceil_pd" && methodSymbol.Name == "Floor")
+ cMethod = "_mm256_floor_pd";
+
var csMethod = new CsMethod
{
Name = methodDeclaration.Identifier.ToString(),
@@ -547,7 +553,11 @@ static string IntelTypeNameToSystemTypeName(string itn)
.Count(
cpt => !intelMethod.Parameters
.Select((ip, j) => (ip, j))
- .Any(ipt => cpt.i == ipt.j && ipt.ip.Type.CsType.Name == cpt.cp.Type.Name && ipt.ip.Type.CsType.TypeParameter == cpt.cp.Type.TypeParameter)
+ .Any(ipt =>
+ cpt.i == ipt.j &&
+ ipt.ip.Type.CsType.Name == cpt.cp.Type.Name &&
+ ipt.ip.Type.CsType.TypeParameter == cpt.cp.Type.TypeParameter
+ )
)
))
.OrderBy(m => m.s)
@@ -559,7 +569,8 @@ static string IntelTypeNameToSystemTypeName(string itn)
return null;
}
- if ((csMethodCand.Length == 8 || csMethodCand.Length == 4) && csMethodCand.Select(m => m.s).Distinct().Count() == 1)
+ var oneDistinctScore = csMethodCand.Select(m => m.s).Distinct().Count() == 1;
+ if ((csMethodCand.Length == 8 || csMethodCand.Length == 4 || csMethodCand.Length == 2) && oneDistinctScore)
return csMethodCand[0].m;
if (csMethodCand.Length > 1)