From 67862f08a8c22cc8e6091b9b386a57b150b0f7b7 Mon Sep 17 00:00:00 2001 From: Martin Othamar Date: Sun, 9 Jul 2023 13:45:26 +0200 Subject: [PATCH] Workarounds --- src/External/RawIntrinsics/AVX.cs | 25 +++++++++++++++++++ src/External/RawIntrinsics/Other.cs | 10 ++++++++ src/External/RawIntrinsics/SSE2.cs | 25 +++++++++++++++++++ .../RawIntrinsicsGenerator/Generator.cs | 15 +++++++++-- 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/src/External/RawIntrinsics/AVX.cs b/src/External/RawIntrinsics/AVX.cs index c693c71..5f80d8f 100644 --- a/src/External/RawIntrinsics/AVX.cs +++ b/src/External/RawIntrinsics/AVX.cs @@ -322,6 +322,22 @@ public static unsafe partial class AVX /// __m256 dst {FP32} public static __m256 _mm256_broadcast_ss(float* mem_addr) => System.Runtime.Intrinsics.X86.Avx.BroadcastScalarToVector256(mem_addr); + /// + /// Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". + /// + /// VROUNDPD ymm, ymm, imm8 + /// __m256d {FP64} + /// __m256d dst {FP64} + public static __m256d _mm256_ceil_pd(__m256d a) => System.Runtime.Intrinsics.X86.Avx.Ceiling(a.FP64); + + /// + /// Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". + /// + /// VROUNDPS ymm, ymm, imm8 + /// __m256 {FP32} + /// __m256 dst {FP32} + public static __m256 _mm256_ceil_ps(__m256 a) => System.Runtime.Intrinsics.X86.Avx.Ceiling(a.FP32); + /// /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst". /// @@ -986,6 +1002,15 @@ public static unsafe partial class AVX /// __m256d dst {FP64} public static __m256d _mm256_setr_m128d(__m128d lo, __m128d hi) => System.Runtime.Intrinsics.Vector256.Create(lo.FP64, hi.FP64); + /// + /// Set packed __m256i vector "dst" with the supplied values. + /// + /// VINSERTF128 ymm, ymm, xmm, imm8 + /// __m128i {M128} + /// __m128i {M128} + /// __m256i dst {M128} + public static __m256i _mm256_setr_m128i(__m128i lo, __m128i hi) => System.Runtime.Intrinsics.Vector256.Create(lo.SI32, hi.SI32); + /// /// Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. /// diff --git a/src/External/RawIntrinsics/Other.cs b/src/External/RawIntrinsics/Other.cs index d8c0988..4ccc689 100644 --- a/src/External/RawIntrinsics/Other.cs +++ b/src/External/RawIntrinsics/Other.cs @@ -55,6 +55,16 @@ public static unsafe partial class Other /// __m128i dst {M128} public static __m128i _mm_aeskeygenassist_si128(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Aes.KeygenAssist(a.UI8, (byte)imm8); + /// + /// Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst". + /// + /// PCLMULQDQ xmm, xmm, imm8 + /// __m128i {M128} + /// __m128i {M128} + /// int {IMM} + /// __m128i dst {M128} + public static __m128i _mm_clmulepi64_si128(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(a.SI64, b.SI64, (byte)imm8); + /// /// Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst". /// diff --git a/src/External/RawIntrinsics/SSE2.cs b/src/External/RawIntrinsics/SSE2.cs index 0279253..8f2a747 100644 --- a/src/External/RawIntrinsics/SSE2.cs +++ b/src/External/RawIntrinsics/SSE2.cs @@ -756,6 +756,14 @@ public static unsafe partial class SSE2 /// __m128d dst {FP64} public static __m128d _mm_loadh_pd(__m128d a, double* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadHigh(a.FP64, mem_addr); + /// + /// Load 64-bit integer from memory into the first element of "dst". + /// + /// MOVQ xmm, m64 + /// __m128i {UI64} + /// __m128i dst {UI64} + public static __m128i _mm_loadl_epi64(__m128i* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadScalarVector128((long*)mem_addr); + /// /// Load a double-precision (64-bit) floating-point element from memory into the lower element of "dst", and copy the upper element from "a" to "dst". "mem_addr" does not need to be aligned on any particular boundary. /// @@ -781,6 +789,14 @@ public static unsafe partial class SSE2 /// __m128i dst {M128} public static __m128i _mm_loadu_si128(__m128i* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadVector128((sbyte*)mem_addr); + /// + /// Load unaligned 32-bit integer from memory into the first element of "dst". + /// + /// MOVD xmm, m32 + /// void {UI32} + /// __m128i dst {UI32} + public static __m128i _mm_loadu_si32(void* mem_addr) => System.Runtime.Intrinsics.X86.Sse2.LoadScalarVector128((int*)mem_addr); + /// /// Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst". /// @@ -1086,6 +1102,15 @@ public static unsafe partial class SSE2 /// __m128i dst {UI32} public static __m128i _mm_setr_epi32(int e3, int e2, int e1, int e0) => System.Runtime.Intrinsics.Vector128.Create((uint)e3, (uint)e2, (uint)e1, (uint)e0); + /// + /// Set packed 64-bit integers in "dst" with the supplied values in reverse order. + /// + /// + /// __m64 {UI64} + /// __m64 {UI64} + /// __m128i dst {UI64} + public static __m128i _mm_setr_epi64(__m64 e1, __m64 e0) => System.Runtime.Intrinsics.Vector128.Create(e1.SI32, e0.SI32); + /// /// Set packed 8-bit integers in "dst" with the supplied values in reverse order. /// diff --git a/src/External/RawIntrinsicsGenerator/Generator.cs b/src/External/RawIntrinsicsGenerator/Generator.cs index 7c6f33c..d593473 100644 --- a/src/External/RawIntrinsicsGenerator/Generator.cs +++ b/src/External/RawIntrinsicsGenerator/Generator.cs @@ -183,6 +183,12 @@ private static async Task Generate(string sriUrl, Regex cppIntrinsicNameMatcher, throw new Exception("Unexpected error - couldnt find end of c method intrinsic"); var cMethod = xmlDoc[cMethodIndex..cMethodEndIndex]; + // Remove when merged: https://github.com/dotnet/runtime/pull/88552 + if (cMethod == "_mm256_ceil_ps" && methodSymbol.Name == "Floor") + cMethod = "_mm256_floor_ps"; + if (cMethod == "_mm256_ceil_pd" && methodSymbol.Name == "Floor") + cMethod = "_mm256_floor_pd"; + var csMethod = new CsMethod { Name = methodDeclaration.Identifier.ToString(), @@ -547,7 +553,11 @@ static string IntelTypeNameToSystemTypeName(string itn) .Count( cpt => !intelMethod.Parameters .Select((ip, j) => (ip, j)) - .Any(ipt => cpt.i == ipt.j && ipt.ip.Type.CsType.Name == cpt.cp.Type.Name && ipt.ip.Type.CsType.TypeParameter == cpt.cp.Type.TypeParameter) + .Any(ipt => + cpt.i == ipt.j && + ipt.ip.Type.CsType.Name == cpt.cp.Type.Name && + ipt.ip.Type.CsType.TypeParameter == cpt.cp.Type.TypeParameter + ) ) )) .OrderBy(m => m.s) @@ -559,7 +569,8 @@ static string IntelTypeNameToSystemTypeName(string itn) return null; } - if ((csMethodCand.Length == 8 || csMethodCand.Length == 4) && csMethodCand.Select(m => m.s).Distinct().Count() == 1) + var oneDistinctScore = csMethodCand.Select(m => m.s).Distinct().Count() == 1; + if ((csMethodCand.Length == 8 || csMethodCand.Length == 4 || csMethodCand.Length == 2) && oneDistinctScore) return csMethodCand[0].m; if (csMethodCand.Length > 1)