diff --git a/src/External/RawIntrinsics/AVX.ManuallyAdded.cs b/src/External/RawIntrinsics/AVX.ManuallyAdded.cs
index 0a7ee43..12d1290 100644
--- a/src/External/RawIntrinsics/AVX.ManuallyAdded.cs
+++ b/src/External/RawIntrinsics/AVX.ManuallyAdded.cs
@@ -1,4 +1,6 @@
-using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics;
+
+[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1857:A constant is expected for the parameter", Justification = "TBD")]
namespace RawIntrinsics
{
@@ -22,4 +24,4 @@ public static partial class AVX
/// __m256i dst {M256}
public static __m256i _mm256_setzero_si256() => System.Runtime.Intrinsics.Vector256.Zero;
}
-}
\ No newline at end of file
+}
diff --git a/src/External/RawIntrinsics/AVX.cs b/src/External/RawIntrinsics/AVX.cs
index c10c3d8..683bce5 100644
--- a/src/External/RawIntrinsics/AVX.cs
+++ b/src/External/RawIntrinsics/AVX.cs
@@ -18,7 +18,7 @@ public static unsafe partial class AVX
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_cmp_pd(__m128d a, __m128d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP64, b.FP64, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
+ public static __m128d _mm_cmp_pd(__m128d a, __m128d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = System.Runtime.Intrinsics.X86.FloatComparisonMode.UnorderedTrueSignaling)] int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP64, b.FP64, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
///
/// Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".
@@ -28,7 +28,7 @@ public static unsafe partial class AVX
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_cmp_ps(__m128 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP32, b.FP32, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
+ public static __m128 _mm_cmp_ps(__m128 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = System.Runtime.Intrinsics.X86.FloatComparisonMode.UnorderedTrueSignaling)] int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP32, b.FP32, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
///
/// Compare the lower double-precision (64-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
@@ -38,7 +38,7 @@ public static unsafe partial class AVX
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_cmp_sd(__m128d a, __m128d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.CompareScalar(a.FP64, b.FP64, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
+ public static __m128d _mm_cmp_sd(__m128d a, __m128d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = System.Runtime.Intrinsics.X86.FloatComparisonMode.UnorderedTrueSignaling)] int imm8) => System.Runtime.Intrinsics.X86.Avx.CompareScalar(a.FP64, b.FP64, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
///
/// Compare the lower single-precision (32-bit) floating-point element in "a" and "b" based on the comparison operand specified by "imm8", store the result in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".
@@ -48,7 +48,7 @@ public static unsafe partial class AVX
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_cmp_ss(__m128 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.CompareScalar(a.FP32, b.FP32, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
+ public static __m128 _mm_cmp_ss(__m128 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = System.Runtime.Intrinsics.X86.FloatComparisonMode.UnorderedTrueSignaling)] int imm8) => System.Runtime.Intrinsics.X86.Avx.CompareScalar(a.FP32, b.FP32, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
///
/// Load packed double-precision (64-bit) floating-point elements from memory into "dst" using "mask" (elements are zeroed out when the high bit of the corresponding element is not set).
@@ -95,7 +95,7 @@ public static unsafe partial class AVX
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_permute_pd(__m128d a, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP64, (byte)imm8);
+ public static __m128d _mm_permute_pd(__m128d a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP64, (byte)imm8);
///
/// Shuffle single-precision (32-bit) floating-point elements in "a" using the control in "imm8", and store the results in "dst".
@@ -104,7 +104,7 @@ public static unsafe partial class AVX
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_permute_ps(__m128 a, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP32, (byte)imm8);
+ public static __m128 _mm_permute_ps(__m128 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP32, (byte)imm8);
///
/// Shuffle double-precision (64-bit) floating-point elements in "a" using the control in "b", and store the results in "dst".
@@ -258,7 +258,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_blend_pd(__m256d a, __m256d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Blend(a.FP64, b.FP64, (byte)imm8);
+ public static __m256d _mm256_blend_pd(__m256d a, __m256d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Blend(a.FP64, b.FP64, (byte)imm8);
///
/// Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".
@@ -268,7 +268,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_blend_ps(__m256 a, __m256 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Blend(a.FP32, b.FP32, (byte)imm8);
+ public static __m256 _mm256_blend_ps(__m256 a, __m256 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Blend(a.FP32, b.FP32, (byte)imm8);
///
/// Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using "mask", and store the results in "dst".
@@ -346,7 +346,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_cmp_pd(__m256d a, __m256d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP64, b.FP64, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
+ public static __m256d _mm256_cmp_pd(__m256d a, __m256d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = System.Runtime.Intrinsics.X86.FloatComparisonMode.UnorderedTrueSignaling)] int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP64, b.FP64, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
///
/// Compare packed single-precision (32-bit) floating-point elements in "a" and "b" based on the comparison operand specified by "imm8", and store the results in "dst".
@@ -356,7 +356,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_cmp_ps(__m256 a, __m256 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP32, b.FP32, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
+ public static __m256 _mm256_cmp_ps(__m256 a, __m256 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Max = System.Runtime.Intrinsics.X86.FloatComparisonMode.UnorderedTrueSignaling)] int imm8) => System.Runtime.Intrinsics.X86.Avx.Compare(a.FP32, b.FP32, (System.Runtime.Intrinsics.X86.FloatComparisonMode)imm8);
///
/// Convert packed signed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst".
@@ -456,7 +456,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_dp_ps(__m256 a, __m256 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.DotProduct(a.FP32, b.FP32, (byte)imm8);
+ public static __m256 _mm256_dp_ps(__m256 a, __m256 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.DotProduct(a.FP32, b.FP32, (byte)imm8);
///
/// Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".
@@ -465,7 +465,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm256_extractf128_pd(__m256d a, int imm8) => System.Runtime.Intrinsics.X86.Avx.ExtractVector128(a.FP64, (byte)imm8);
+ public static __m128d _mm256_extractf128_pd(__m256d a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.ExtractVector128(a.FP64, (byte)imm8);
///
/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "a", selected with "imm8", and store the result in "dst".
@@ -474,7 +474,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm256_extractf128_ps(__m256 a, int imm8) => System.Runtime.Intrinsics.X86.Avx.ExtractVector128(a.FP32, (byte)imm8);
+ public static __m128 _mm256_extractf128_ps(__m256 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.ExtractVector128(a.FP32, (byte)imm8);
///
/// Extract 128 bits (composed of integer data) from "a", selected with "imm8", and store the result in "dst".
@@ -483,23 +483,7 @@ public static unsafe partial class AVX
/// __m256i {M128}
/// int {IMM}
/// __m128i dst {M128}
- public static __m128i _mm256_extractf128_si256(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx.ExtractVector128(a.UI8, (byte)imm8);
-
- ///
- /// Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".
- ///
- /// VROUNDPD ymm, ymm, imm8
- /// __m256d {FP64}
- /// __m256d dst {FP64}
- public static __m256d _mm256_floor_pd(__m256d a) => System.Runtime.Intrinsics.X86.Avx.Floor(a.FP64);
-
- ///
- /// Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst".
- ///
- /// VROUNDPS ymm, ymm, imm8
- /// __m256 {FP32}
- /// __m256 dst {FP32}
- public static __m256 _mm256_floor_ps(__m256 a) => System.Runtime.Intrinsics.X86.Avx.Floor(a.FP32);
+ public static __m128i _mm256_extractf128_si256(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.ExtractVector128(a.UI8, (byte)imm8);
///
/// Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in "a" and "b", and pack the results in "dst".
@@ -529,7 +513,7 @@ public static unsafe partial class AVX
public static __m256d _mm256_hsub_pd(__m256d a, __m256d b) => System.Runtime.Intrinsics.X86.Avx.HorizontalSubtract(a.FP64, b.FP64);
///
- /// Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".
+ /// Horizontally subtract adjacent pairs of single-precision (32-bit) floating-point elements in "a" and "b", and pack the results in "dst".
///
/// VHSUBPS ymm, ymm, ymm
/// __m256 {FP32}
@@ -545,7 +529,7 @@ public static unsafe partial class AVX
/// __m128d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_insertf128_pd(__m256d a, __m128d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.InsertVector128(a.FP64, b.FP64, (byte)imm8);
+ public static __m256d _mm256_insertf128_pd(__m256d a, __m128d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.InsertVector128(a.FP64, b.FP64, (byte)imm8);
///
/// Copy "a" to "dst", then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from "b" into "dst" at the location specified by "imm8".
@@ -555,7 +539,7 @@ public static unsafe partial class AVX
/// __m128 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_insertf128_ps(__m256 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.InsertVector128(a.FP32, b.FP32, (byte)imm8);
+ public static __m256 _mm256_insertf128_ps(__m256 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.InsertVector128(a.FP32, b.FP32, (byte)imm8);
///
/// Copy "a" to "dst", then insert 128 bits from "b" into "dst" at the location specified by "imm8".
@@ -565,7 +549,7 @@ public static unsafe partial class AVX
/// __m128i {M128}
/// int {IMM}
/// __m256i dst {M128}
- public static __m256i _mm256_insertf128_si256(__m256i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Avx.InsertVector128(a.UI8, b.UI8, (byte)imm8);
+ public static __m256i _mm256_insertf128_si256(__m256i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.InsertVector128(a.UI8, b.UI8, (byte)imm8);
///
/// Load 256-bits of integer data from unaligned memory into "dst". This intrinsic may perform better than "_mm256_loadu_si256" when the data crosses a cache line boundary.
@@ -662,7 +646,7 @@ public static unsafe partial class AVX
public static void _mm256_maskstore_ps(float* mem_addr, __m256i mask, __m256 a) => System.Runtime.Intrinsics.X86.Avx.MaskStore(mem_addr, mask.FP32, a.FP32);
///
- /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".
+ /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note]
///
/// VMAXPD ymm, ymm, ymm
/// __m256d {FP64}
@@ -671,7 +655,7 @@ public static unsafe partial class AVX
public static __m256d _mm256_max_pd(__m256d a, __m256d b) => System.Runtime.Intrinsics.X86.Avx.Max(a.FP64, b.FP64);
///
- /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".
+ /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note]
///
/// VMAXPS ymm, ymm, ymm
/// __m256 {FP32}
@@ -680,7 +664,7 @@ public static unsafe partial class AVX
public static __m256 _mm256_max_ps(__m256 a, __m256 b) => System.Runtime.Intrinsics.X86.Avx.Max(a.FP32, b.FP32);
///
- /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+ /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note]
///
/// VMINPD ymm, ymm, ymm
/// __m256d {FP64}
@@ -689,7 +673,7 @@ public static unsafe partial class AVX
public static __m256d _mm256_min_pd(__m256d a, __m256d b) => System.Runtime.Intrinsics.X86.Avx.Min(a.FP64, b.FP64);
///
- /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+ /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note]
///
/// VMINPS ymm, ymm, ymm
/// __m256 {FP32}
@@ -780,7 +764,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_permute_pd(__m256d a, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP64, (byte)imm8);
+ public static __m256d _mm256_permute_pd(__m256d a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP64, (byte)imm8);
///
/// Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".
@@ -789,7 +773,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_permute_ps(__m256 a, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP32, (byte)imm8);
+ public static __m256 _mm256_permute_ps(__m256 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute(a.FP32, (byte)imm8);
///
/// Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".
@@ -799,7 +783,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_permute2f128_pd(__m256d a, __m256d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute2x128(a.FP64, b.FP64, (byte)imm8);
+ public static __m256d _mm256_permute2f128_pd(__m256d a, __m256d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute2x128(a.FP64, b.FP64, (byte)imm8);
///
/// Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by "imm8" from "a" and "b", and store the results in "dst".
@@ -809,7 +793,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_permute2f128_ps(__m256 a, __m256 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute2x128(a.FP32, b.FP32, (byte)imm8);
+ public static __m256 _mm256_permute2f128_ps(__m256 a, __m256 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute2x128(a.FP32, b.FP32, (byte)imm8);
///
/// Shuffle 128-bits (composed of integer data) selected by "imm8" from "a" and "b", and store the results in "dst".
@@ -819,7 +803,7 @@ public static unsafe partial class AVX
/// __m256i {M256}
/// int {IMM}
/// __m256i dst {M256}
- public static __m256i _mm256_permute2f128_si256(__m256i a, __m256i b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute2x128(a.UI8, b.UI8, (byte)imm8);
+ public static __m256i _mm256_permute2f128_si256(__m256i a, __m256i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Permute2x128(a.UI8, b.UI8, (byte)imm8);
///
/// Shuffle double-precision (64-bit) floating-point elements in "a" within 128-bit lanes using the control in "b", and store the results in "dst".
@@ -854,7 +838,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_round_pd(__m256d a, int rounding) => System.Runtime.Intrinsics.X86.Avx.RoundCurrentDirection(a.FP64);
+ public static __m256d _mm256_round_pd(__m256d a, int rounding) => System.Runtime.Intrinsics.X86.Avx.RoundToNearestInteger(a.FP64);
///
/// Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". [round_note]
@@ -1070,7 +1054,7 @@ public static unsafe partial class AVX
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_shuffle_pd(__m256d a, __m256d b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Shuffle(a.FP64, b.FP64, (byte)imm8);
+ public static __m256d _mm256_shuffle_pd(__m256d a, __m256d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Shuffle(a.FP64, b.FP64, (byte)imm8);
///
/// Shuffle single-precision (32-bit) floating-point elements in "a" within 128-bit lanes using the control in "imm8", and store the results in "dst".
@@ -1080,7 +1064,7 @@ public static unsafe partial class AVX
/// __m256 {FP32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_shuffle_ps(__m256 a, __m256 b, int imm8) => System.Runtime.Intrinsics.X86.Avx.Shuffle(a.FP32, b.FP32, (byte)imm8);
+ public static __m256 _mm256_shuffle_ps(__m256 a, __m256 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx.Shuffle(a.FP32, b.FP32, (byte)imm8);
///
/// Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
diff --git a/src/External/RawIntrinsics/AVX2.cs b/src/External/RawIntrinsics/AVX2.cs
index 931839c..6ece5e0 100644
--- a/src/External/RawIntrinsics/AVX2.cs
+++ b/src/External/RawIntrinsics/AVX2.cs
@@ -10,7 +10,7 @@ public static unsafe partial class AVX2
/// __m128i {UI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_blend_epi32(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Blend(a.UI32, b.UI32, (byte)imm8);
+ public static __m128i _mm_blend_epi32(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Blend(a.UI32, b.UI32, (byte)imm8);
///
/// Broadcast the low packed 8-bit integer from "a" to all elements of "dst".
@@ -68,7 +68,7 @@ public static unsafe partial class AVX2
/// __m128i {SI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_i32gather_epi32(int* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((uint*)base_addr, vindex.SI32, (byte)scale);
+ public static __m128i _mm_i32gather_epi32(int* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((uint*)base_addr, vindex.SI32, (byte)scale);
///
/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -78,7 +78,7 @@ public static unsafe partial class AVX2
/// __m128i {SI32}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_i32gather_epi64(long* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((ulong*)base_addr, vindex.SI32, (byte)scale);
+ public static __m128i _mm_i32gather_epi64(long* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((ulong*)base_addr, vindex.SI32, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -88,7 +88,7 @@ public static unsafe partial class AVX2
/// __m128i {SI32}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_i32gather_pd(double* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI32, (byte)scale);
+ public static __m128d _mm_i32gather_pd(double* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI32, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -98,7 +98,7 @@ public static unsafe partial class AVX2
/// __m128i {SI32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_i32gather_ps(float* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI32, (byte)scale);
+ public static __m128 _mm_i32gather_ps(float* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI32, (byte)scale);
///
/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -108,7 +108,7 @@ public static unsafe partial class AVX2
/// __m128i {SI64}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_i64gather_epi32(int* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((uint*)base_addr, vindex.SI64, (byte)scale);
+ public static __m128i _mm_i64gather_epi32(int* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((uint*)base_addr, vindex.SI64, (byte)scale);
///
/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -118,7 +118,7 @@ public static unsafe partial class AVX2
/// __m128i {SI64}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_i64gather_epi64(long* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((ulong*)base_addr, vindex.SI64, (byte)scale);
+ public static __m128i _mm_i64gather_epi64(long* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((ulong*)base_addr, vindex.SI64, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -128,7 +128,7 @@ public static unsafe partial class AVX2
/// __m128i {SI64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_i64gather_pd(double* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI64, (byte)scale);
+ public static __m128d _mm_i64gather_pd(double* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI64, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -138,7 +138,7 @@ public static unsafe partial class AVX2
/// __m128i {SI64}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_i64gather_ps(float* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI64, (byte)scale);
+ public static __m128 _mm_i64gather_ps(float* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI64, (byte)scale);
///
/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -150,7 +150,7 @@ public static unsafe partial class AVX2
/// __m128i {MASK}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_mask_i32gather_epi32(__m128i src, int* base_addr, __m128i vindex, __m128i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI32, (uint*)base_addr, vindex.SI32, mask.UI32, (byte)scale);
+ public static __m128i _mm_mask_i32gather_epi32(__m128i src, int* base_addr, __m128i vindex, __m128i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI32, (uint*)base_addr, vindex.SI32, mask.UI32, (byte)scale);
///
/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -162,7 +162,7 @@ public static unsafe partial class AVX2
/// __m128i {MASK}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_mask_i32gather_epi64(__m128i src, long* base_addr, __m128i vindex, __m128i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI64, (ulong*)base_addr, vindex.SI32, mask.UI64, (byte)scale);
+ public static __m128i _mm_mask_i32gather_epi64(__m128i src, long* base_addr, __m128i vindex, __m128i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI64, (ulong*)base_addr, vindex.SI32, mask.UI64, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -174,7 +174,7 @@ public static unsafe partial class AVX2
/// __m128d {MASK}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_mask_i32gather_pd(__m128d src, double* base_addr, __m128i vindex, __m128d mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP64, base_addr, vindex.SI32, mask.FP64, (byte)scale);
+ public static __m128d _mm_mask_i32gather_pd(__m128d src, double* base_addr, __m128i vindex, __m128d mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP64, base_addr, vindex.SI32, mask.FP64, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -186,7 +186,7 @@ public static unsafe partial class AVX2
/// __m128 {MASK}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_mask_i32gather_ps(__m128 src, float* base_addr, __m128i vindex, __m128 mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP32, base_addr, vindex.SI32, mask.FP32, (byte)scale);
+ public static __m128 _mm_mask_i32gather_ps(__m128 src, float* base_addr, __m128i vindex, __m128 mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP32, base_addr, vindex.SI32, mask.FP32, (byte)scale);
///
/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -198,7 +198,7 @@ public static unsafe partial class AVX2
/// __m128i {MASK}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_mask_i64gather_epi32(__m128i src, int* base_addr, __m128i vindex, __m128i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI32, (uint*)base_addr, vindex.SI64, mask.UI32, (byte)scale);
+ public static __m128i _mm_mask_i64gather_epi32(__m128i src, int* base_addr, __m128i vindex, __m128i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI32, (uint*)base_addr, vindex.SI64, mask.UI32, (byte)scale);
///
/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -210,7 +210,7 @@ public static unsafe partial class AVX2
/// __m128i {MASK}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_mask_i64gather_epi64(__m128i src, long* base_addr, __m128i vindex, __m128i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI64, (ulong*)base_addr, vindex.SI64, mask.UI64, (byte)scale);
+ public static __m128i _mm_mask_i64gather_epi64(__m128i src, long* base_addr, __m128i vindex, __m128i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI64, (ulong*)base_addr, vindex.SI64, mask.UI64, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -222,7 +222,7 @@ public static unsafe partial class AVX2
/// __m128d {MASK}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_mask_i64gather_pd(__m128d src, double* base_addr, __m128i vindex, __m128d mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP64, base_addr, vindex.SI64, mask.FP64, (byte)scale);
+ public static __m128d _mm_mask_i64gather_pd(__m128d src, double* base_addr, __m128i vindex, __m128d mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP64, base_addr, vindex.SI64, mask.FP64, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -234,7 +234,7 @@ public static unsafe partial class AVX2
/// __m128 {MASK}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_mask_i64gather_ps(__m128 src, float* base_addr, __m128i vindex, __m128 mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP32, base_addr, vindex.SI64, mask.FP32, (byte)scale);
+ public static __m128 _mm_mask_i64gather_ps(__m128 src, float* base_addr, __m128i vindex, __m128 mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP32, base_addr, vindex.SI64, mask.FP32, (byte)scale);
///
/// Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).
@@ -423,7 +423,7 @@ public static unsafe partial class AVX2
/// __m256i {UI8}
/// int {IMM}
/// __m256i dst {UI8}
- public static __m256i _mm256_alignr_epi8(__m256i a, __m256i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.AlignRight(a.UI8, b.UI8, (byte)imm8);
+ public static __m256i _mm256_alignr_epi8(__m256i a, __m256i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.AlignRight(a.UI8, b.UI8, (byte)imm8);
///
/// Compute the bitwise AND of 256 bits (representing integer data) in "a" and "b", and store the result in "dst".
@@ -469,7 +469,7 @@ public static unsafe partial class AVX2
/// __m256i {UI16}
/// int {IMM}
/// __m256i dst {UI16}
- public static __m256i _mm256_blend_epi16(__m256i a, __m256i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Blend(a.UI16, b.UI16, (byte)imm8);
+ public static __m256i _mm256_blend_epi16(__m256i a, __m256i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Blend(a.UI16, b.UI16, (byte)imm8);
///
/// Blend packed 32-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst".
@@ -479,7 +479,7 @@ public static unsafe partial class AVX2
/// __m256i {UI32}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_blend_epi32(__m256i a, __m256i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Blend(a.UI32, b.UI32, (byte)imm8);
+ public static __m256i _mm256_blend_epi32(__m256i a, __m256i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Blend(a.UI32, b.UI32, (byte)imm8);
///
/// Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".
@@ -554,7 +554,7 @@ public static unsafe partial class AVX2
/// __m256i {M128}
/// int {IMM}
/// __m256i dst {M128}
- public static __m256i _mm256_bslli_epi128(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical128BitLane(a.SI8, (byte)imm8);
+ public static __m256i _mm256_bslli_epi128(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical128BitLane(a.SI8, (byte)imm8);
///
/// Shift 128-bit lanes in "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".
@@ -563,7 +563,7 @@ public static unsafe partial class AVX2
/// __m256i {M128}
/// int {IMM}
/// __m256i dst {M128}
- public static __m256i _mm256_bsrli_epi128(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical128BitLane(a.SI8, (byte)imm8);
+ public static __m256i _mm256_bsrli_epi128(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical128BitLane(a.SI8, (byte)imm8);
///
/// Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".
@@ -740,7 +740,7 @@ public static unsafe partial class AVX2
/// __m256i {M128}
/// int {IMM}
/// __m128i dst {M128}
- public static __m128i _mm256_extracti128_si256(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ExtractVector128(a.SI8, (byte)imm8);
+ public static __m128i _mm256_extracti128_si256(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ExtractVector128(a.SI8, (byte)imm8);
///
/// Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".
@@ -804,7 +804,7 @@ public static unsafe partial class AVX2
/// __m256i {SI32}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_i32gather_epi32(int* base_addr, __m256i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256((uint*)base_addr, vindex.SI32, (byte)scale);
+ public static __m256i _mm256_i32gather_epi32(int* base_addr, __m256i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256((uint*)base_addr, vindex.SI32, (byte)scale);
///
/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -814,7 +814,7 @@ public static unsafe partial class AVX2
/// __m128i {SI32}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_i32gather_epi64(long* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256((ulong*)base_addr, vindex.SI32, (byte)scale);
+ public static __m256i _mm256_i32gather_epi64(long* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256((ulong*)base_addr, vindex.SI32, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -824,7 +824,7 @@ public static unsafe partial class AVX2
/// __m128i {SI32}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_i32gather_pd(double* base_addr, __m128i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256(base_addr, vindex.SI32, (byte)scale);
+ public static __m256d _mm256_i32gather_pd(double* base_addr, __m128i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256(base_addr, vindex.SI32, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -834,7 +834,7 @@ public static unsafe partial class AVX2
/// __m256i {SI32}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_i32gather_ps(float* base_addr, __m256i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256(base_addr, vindex.SI32, (byte)scale);
+ public static __m256 _mm256_i32gather_ps(float* base_addr, __m256i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256(base_addr, vindex.SI32, (byte)scale);
///
/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -844,7 +844,7 @@ public static unsafe partial class AVX2
/// __m256i {SI64}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm256_i64gather_epi32(int* base_addr, __m256i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((uint*)base_addr, vindex.SI64, (byte)scale);
+ public static __m128i _mm256_i64gather_epi32(int* base_addr, __m256i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128((uint*)base_addr, vindex.SI64, (byte)scale);
///
/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -854,7 +854,7 @@ public static unsafe partial class AVX2
/// __m256i {SI64}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_i64gather_epi64(long* base_addr, __m256i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256((ulong*)base_addr, vindex.SI64, (byte)scale);
+ public static __m256i _mm256_i64gather_epi64(long* base_addr, __m256i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256((ulong*)base_addr, vindex.SI64, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -864,7 +864,7 @@ public static unsafe partial class AVX2
/// __m256i {SI64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_i64gather_pd(double* base_addr, __m256i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256(base_addr, vindex.SI64, (byte)scale);
+ public static __m256d _mm256_i64gather_pd(double* base_addr, __m256i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector256(base_addr, vindex.SI64, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst". "scale" should be 1, 2, 4 or 8.
@@ -874,7 +874,7 @@ public static unsafe partial class AVX2
/// __m256i {SI64}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm256_i64gather_ps(float* base_addr, __m256i vindex, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI64, (byte)scale);
+ public static __m128 _mm256_i64gather_ps(float* base_addr, __m256i vindex, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherVector128(base_addr, vindex.SI64, (byte)scale);
///
/// Copy "a" to "dst", then insert 128 bits (composed of integer data) from "b" into "dst" at the location specified by "imm8".
@@ -884,7 +884,7 @@ public static unsafe partial class AVX2
/// __m128i {M128}
/// int {IMM}
/// __m256i dst {M128}
- public static __m256i _mm256_inserti128_si256(__m256i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.InsertVector128(a.SI8, b.SI8, (byte)imm8);
+ public static __m256i _mm256_inserti128_si256(__m256i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.InsertVector128(a.SI8, b.SI8, (byte)imm8);
///
/// Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".
@@ -914,7 +914,7 @@ public static unsafe partial class AVX2
/// __m256i {MASK}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_mask_i32gather_epi32(__m256i src, int* base_addr, __m256i vindex, __m256i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.UI32, (uint*)base_addr, vindex.SI32, mask.UI32, (byte)scale);
+ public static __m256i _mm256_mask_i32gather_epi32(__m256i src, int* base_addr, __m256i vindex, __m256i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.UI32, (uint*)base_addr, vindex.SI32, mask.UI32, (byte)scale);
///
/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -926,7 +926,7 @@ public static unsafe partial class AVX2
/// __m256i {MASK}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_mask_i32gather_epi64(__m256i src, long* base_addr, __m128i vindex, __m256i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.UI64, (ulong*)base_addr, vindex.SI32, mask.UI64, (byte)scale);
+ public static __m256i _mm256_mask_i32gather_epi64(__m256i src, long* base_addr, __m128i vindex, __m256i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.UI64, (ulong*)base_addr, vindex.SI32, mask.UI64, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -938,7 +938,7 @@ public static unsafe partial class AVX2
/// __m256d {MASK}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_mask_i32gather_pd(__m256d src, double* base_addr, __m128i vindex, __m256d mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.FP64, base_addr, vindex.SI32, mask.FP64, (byte)scale);
+ public static __m256d _mm256_mask_i32gather_pd(__m256d src, double* base_addr, __m128i vindex, __m256d mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.FP64, base_addr, vindex.SI32, mask.FP64, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 32-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -950,7 +950,7 @@ public static unsafe partial class AVX2
/// __m256 {MASK}
/// int {IMM}
/// __m256 dst {FP32}
- public static __m256 _mm256_mask_i32gather_ps(__m256 src, float* base_addr, __m256i vindex, __m256 mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.FP32, base_addr, vindex.SI32, mask.FP32, (byte)scale);
+ public static __m256 _mm256_mask_i32gather_ps(__m256 src, float* base_addr, __m256i vindex, __m256 mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.FP32, base_addr, vindex.SI32, mask.FP32, (byte)scale);
///
/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -962,7 +962,7 @@ public static unsafe partial class AVX2
/// __m128i {MASK}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm256_mask_i64gather_epi32(__m128i src, int* base_addr, __m256i vindex, __m128i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI32, (uint*)base_addr, vindex.SI64, mask.UI32, (byte)scale);
+ public static __m128i _mm256_mask_i64gather_epi32(__m128i src, int* base_addr, __m256i vindex, __m128i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.UI32, (uint*)base_addr, vindex.SI64, mask.UI32, (byte)scale);
///
/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -974,7 +974,7 @@ public static unsafe partial class AVX2
/// __m256i {MASK}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_mask_i64gather_epi64(__m256i src, long* base_addr, __m256i vindex, __m256i mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.UI64, (ulong*)base_addr, vindex.SI64, mask.UI64, (byte)scale);
+ public static __m256i _mm256_mask_i64gather_epi64(__m256i src, long* base_addr, __m256i vindex, __m256i mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.UI64, (ulong*)base_addr, vindex.SI64, mask.UI64, (byte)scale);
///
/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -986,7 +986,7 @@ public static unsafe partial class AVX2
/// __m256d {MASK}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_mask_i64gather_pd(__m256d src, double* base_addr, __m256i vindex, __m256d mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.FP64, base_addr, vindex.SI64, mask.FP64, (byte)scale);
+ public static __m256d _mm256_mask_i64gather_pd(__m256d src, double* base_addr, __m256i vindex, __m256d mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector256(src.FP64, base_addr, vindex.SI64, mask.FP64, (byte)scale);
///
/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at "base_addr" and offset by each 64-bit element in "vindex" (each index is scaled by the factor in "scale"). Gathered elements are merged into "dst" using "mask" (elements are copied from "src" when the highest bit is not set in the corresponding element). "scale" should be 1, 2, 4 or 8.
@@ -998,7 +998,7 @@ public static unsafe partial class AVX2
/// __m128 {MASK}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm256_mask_i64gather_ps(__m128 src, float* base_addr, __m256i vindex, __m128 mask, int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP32, base_addr, vindex.SI64, mask.FP32, (byte)scale);
+ public static __m128 _mm256_mask_i64gather_ps(__m128 src, float* base_addr, __m256i vindex, __m128 mask, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute(Min = 1, Max = 8)] int scale) => System.Runtime.Intrinsics.X86.Avx2.GatherMaskVector128(src.FP32, base_addr, vindex.SI64, mask.FP32, (byte)scale);
///
/// Load packed 32-bit integers from memory into "dst" using "mask" (elements are zeroed out when the highest bit is not set in the corresponding element).
@@ -1162,7 +1162,7 @@ public static unsafe partial class AVX2
/// __m256i {UI8}
/// int {IMM}
/// __m256i dst {UI8}
- public static __m256i _mm256_mpsadbw_epu8(__m256i a, __m256i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.MultipleSumAbsoluteDifferences(a.UI8, b.UI8, (byte)imm8);
+ public static __m256i _mm256_mpsadbw_epu8(__m256i a, __m256i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.MultipleSumAbsoluteDifferences(a.UI8, b.UI8, (byte)imm8);
///
/// Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst".
@@ -1280,7 +1280,7 @@ public static unsafe partial class AVX2
/// __m256i {M256}
/// int {IMM}
/// __m256i dst {M256}
- public static __m256i _mm256_permute2x128_si256(__m256i a, __m256i b, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Permute2x128(a.SI8, b.SI8, (byte)imm8);
+ public static __m256i _mm256_permute2x128_si256(__m256i a, __m256i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Permute2x128(a.SI8, b.SI8, (byte)imm8);
///
/// Shuffle 64-bit integers in "a" across lanes using the control in "imm8", and store the results in "dst".
@@ -1289,7 +1289,7 @@ public static unsafe partial class AVX2
/// __m256i {UI64}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_permute4x64_epi64(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Permute4x64(a.UI64, (byte)imm8);
+ public static __m256i _mm256_permute4x64_epi64(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Permute4x64(a.UI64, (byte)imm8);
///
/// Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst".
@@ -1298,7 +1298,7 @@ public static unsafe partial class AVX2
/// __m256d {FP64}
/// int {IMM}
/// __m256d dst {FP64}
- public static __m256d _mm256_permute4x64_pd(__m256d a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Permute4x64(a.FP64, (byte)imm8);
+ public static __m256d _mm256_permute4x64_pd(__m256d a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Permute4x64(a.FP64, (byte)imm8);
///
/// Shuffle 32-bit integers in "a" across lanes using the corresponding index in "idx", and store the results in "dst".
@@ -1334,7 +1334,7 @@ public static unsafe partial class AVX2
/// __m256i {UI32}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_shuffle_epi32(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.Shuffle(a.UI32, (byte)imm8);
+ public static __m256i _mm256_shuffle_epi32(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.Shuffle(a.UI32, (byte)imm8);
///
/// Shuffle 8-bit integers in "a" within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst".
@@ -1352,7 +1352,7 @@ public static unsafe partial class AVX2
/// __m256i {UI16}
/// int {IMM}
/// __m256i dst {UI16}
- public static __m256i _mm256_shufflehi_epi16(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShuffleHigh(a.UI16, (byte)imm8);
+ public static __m256i _mm256_shufflehi_epi16(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShuffleHigh(a.UI16, (byte)imm8);
///
/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of "a" using the control in "imm8". Store the results in the low 64 bits of 128-bit lanes of "dst", with the high 64 bits of 128-bit lanes being copied from from "a" to "dst".
@@ -1361,7 +1361,7 @@ public static unsafe partial class AVX2
/// __m256i {UI16}
/// int {IMM}
/// __m256i dst {UI16}
- public static __m256i _mm256_shufflelo_epi16(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShuffleLow(a.UI16, (byte)imm8);
+ public static __m256i _mm256_shufflelo_epi16(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShuffleLow(a.UI16, (byte)imm8);
///
/// Negate packed signed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero.
@@ -1424,7 +1424,7 @@ public static unsafe partial class AVX2
/// __m256i {UI16}
/// int {IMM}
/// __m256i dst {UI16}
- public static __m256i _mm256_slli_epi16(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical(a.UI16, (byte)imm8);
+ public static __m256i _mm256_slli_epi16(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical(a.UI16, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1433,7 +1433,7 @@ public static unsafe partial class AVX2
/// __m256i {UI32}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_slli_epi32(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical(a.UI32, (byte)imm8);
+ public static __m256i _mm256_slli_epi32(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical(a.UI32, (byte)imm8);
///
/// Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1442,7 +1442,7 @@ public static unsafe partial class AVX2
/// __m256i {UI64}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_slli_epi64(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical(a.UI64, (byte)imm8);
+ public static __m256i _mm256_slli_epi64(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftLeftLogical(a.UI64, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" left by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".
@@ -1469,7 +1469,7 @@ public static unsafe partial class AVX2
/// __m256i {SI16}
/// int {IMM}
/// __m256i dst {UI16}
- public static __m256i _mm256_srai_epi16(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightArithmetic(a.SI16, (byte)imm8);
+ public static __m256i _mm256_srai_epi16(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightArithmetic(a.SI16, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".
@@ -1478,7 +1478,7 @@ public static unsafe partial class AVX2
/// __m256i {SI32}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_srai_epi32(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightArithmetic(a.SI32, (byte)imm8);
+ public static __m256i _mm256_srai_epi32(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightArithmetic(a.SI32, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in sign bits, and store the results in "dst".
@@ -1523,7 +1523,7 @@ public static unsafe partial class AVX2
/// __m256i {UI16}
/// int {IMM}
/// __m256i dst {UI16}
- public static __m256i _mm256_srli_epi16(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical(a.UI16, (byte)imm8);
+ public static __m256i _mm256_srli_epi16(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical(a.UI16, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1532,7 +1532,7 @@ public static unsafe partial class AVX2
/// __m256i {UI32}
/// int {IMM}
/// __m256i dst {UI32}
- public static __m256i _mm256_srli_epi32(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical(a.UI32, (byte)imm8);
+ public static __m256i _mm256_srli_epi32(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical(a.UI32, (byte)imm8);
///
/// Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1541,7 +1541,7 @@ public static unsafe partial class AVX2
/// __m256i {UI64}
/// int {IMM}
/// __m256i dst {UI64}
- public static __m256i _mm256_srli_epi64(__m256i a, int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical(a.UI64, (byte)imm8);
+ public static __m256i _mm256_srli_epi64(__m256i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Avx2.ShiftRightLogical(a.UI64, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" right by the amount specified by the corresponding element in "count" while shifting in zeros, and store the results in "dst".
diff --git a/src/External/RawIntrinsics/Other.cs b/src/External/RawIntrinsics/Other.cs
index 2d53527..4ccc689 100644
--- a/src/External/RawIntrinsics/Other.cs
+++ b/src/External/RawIntrinsics/Other.cs
@@ -53,7 +53,7 @@ public static unsafe partial class Other
/// __m128i {M128}
/// int {IMM}
/// __m128i dst {M128}
- public static __m128i _mm_aeskeygenassist_si128(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Aes.KeygenAssist(a.UI8, (byte)imm8);
+ public static __m128i _mm_aeskeygenassist_si128(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Aes.KeygenAssist(a.UI8, (byte)imm8);
///
/// Perform a carry-less multiplication of two 64-bit integers, selected from "a" and "b" according to "imm8", and store the results in "dst".
@@ -63,7 +63,7 @@ public static unsafe partial class Other
/// __m128i {M128}
/// int {IMM}
/// __m128i dst {M128}
- public static __m128i _mm_clmulepi64_si128(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(a.SI64, b.SI64, (byte)imm8);
+ public static __m128i _mm_clmulepi64_si128(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Pclmulqdq.CarrylessMultiply(a.SI64, b.SI64, (byte)imm8);
///
/// Count the number of bits set to 1 in unsigned 32-bit integer "a", and return that count in "dst".
diff --git a/src/External/RawIntrinsics/SSE.cs b/src/External/RawIntrinsics/SSE.cs
index e7c090c..e642ed0 100644
--- a/src/External/RawIntrinsics/SSE.cs
+++ b/src/External/RawIntrinsics/SSE.cs
@@ -419,7 +419,7 @@ public static unsafe partial class SSE
public static __m128 _mm_loadu_ps(float* mem_addr) => System.Runtime.Intrinsics.X86.Sse.LoadVector128(mem_addr);
///
- /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".
+ /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note]
///
/// MAXPS xmm, xmm
/// __m128 {FP32}
@@ -428,7 +428,7 @@ public static unsafe partial class SSE
public static __m128 _mm_max_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse.Max(a.FP32, b.FP32);
///
- /// Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst".
+ /// Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [max_float_note]
///
/// MAXSS xmm, xmm
/// __m128 {FP32}
@@ -437,7 +437,7 @@ public static unsafe partial class SSE
public static __m128 _mm_max_ss(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse.MaxScalar(a.FP32, b.FP32);
///
- /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+ /// Compare packed single-precision (32-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note]
///
/// MINPS xmm, xmm
/// __m128 {FP32}
@@ -446,7 +446,7 @@ public static unsafe partial class SSE
public static __m128 _mm_min_ps(__m128 a, __m128 b) => System.Runtime.Intrinsics.X86.Sse.Min(a.FP32, b.FP32);
///
- /// Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst".
+ /// Compare the lower single-precision (32-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper element of "dst". [min_float_note]
///
/// MINSS xmm, xmm
/// __m128 {FP32}
@@ -591,7 +591,7 @@ public static unsafe partial class SSE
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_shuffle_ps(__m128 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Sse.Shuffle(a.FP32, b.FP32, (byte)imm8);
+ public static __m128 _mm_shuffle_ps(__m128 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse.Shuffle(a.FP32, b.FP32, (byte)imm8);
///
/// Compute the square root of packed single-precision (32-bit) floating-point elements in "a", and store the results in "dst".
diff --git a/src/External/RawIntrinsics/SSE2.cs b/src/External/RawIntrinsics/SSE2.cs
index e25c4fc..1c0f5b5 100644
--- a/src/External/RawIntrinsics/SSE2.cs
+++ b/src/External/RawIntrinsics/SSE2.cs
@@ -153,7 +153,7 @@ public static unsafe partial class SSE2
/// __m128i {M128}
/// int {IMM}
/// __m128i dst {M128}
- public static __m128i _mm_bslli_si128(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(a.SI8, (byte)imm8);
+ public static __m128i _mm_bslli_si128(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical128BitLane(a.SI8, (byte)imm8);
///
/// Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst".
@@ -162,7 +162,7 @@ public static unsafe partial class SSE2
/// __m128i {M128}
/// int {IMM}
/// __m128i dst {M128}
- public static __m128i _mm_bsrli_si128(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(a.SI8, (byte)imm8);
+ public static __m128i _mm_bsrli_si128(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical128BitLane(a.SI8, (byte)imm8);
///
/// Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst".
@@ -704,7 +704,7 @@ public static unsafe partial class SSE2
/// __m128i {UI16}
/// int {IMM}
/// int dst {UI16}
- public static int _mm_extract_epi16(__m128i a, int imm8) => (int)System.Runtime.Intrinsics.X86.Sse2.Extract(a.UI16, (byte)imm8);
+ public static int _mm_extract_epi16(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => (int)System.Runtime.Intrinsics.X86.Sse2.Extract(a.UI16, (byte)imm8);
///
/// Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8".
@@ -714,7 +714,7 @@ public static unsafe partial class SSE2
/// int {UI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_insert_epi16(__m128i a, int i, int imm8) => System.Runtime.Intrinsics.X86.Sse2.Insert(a.UI16, (ushort)i, (byte)imm8);
+ public static __m128i _mm_insert_epi16(__m128i a, int i, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.Insert(a.UI16, (ushort)i, (byte)imm8);
///
/// Perform a serializing operation on all load-from-memory instructions that were issued prior to this instruction. Guarantees that every load instruction that precedes, in program order, is globally visible before any load instruction which follows the fence in program order.
@@ -835,7 +835,7 @@ public static unsafe partial class SSE2
public static __m128i _mm_max_epu8(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Sse2.Max(a.UI8, b.UI8);
///
- /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst".
+ /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". [max_float_note]
///
/// MAXPD xmm, xmm
/// __m128d {FP64}
@@ -844,7 +844,7 @@ public static unsafe partial class SSE2
public static __m128d _mm_max_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse2.Max(a.FP64, b.FP64);
///
- /// Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+ /// Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [max_float_note]
///
/// MAXSD xmm, xmm
/// __m128d {FP64}
@@ -878,7 +878,7 @@ public static unsafe partial class SSE2
public static __m128i _mm_min_epu8(__m128i a, __m128i b) => System.Runtime.Intrinsics.X86.Sse2.Min(a.UI8, b.UI8);
///
- /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst".
+ /// Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". [min_float_note]
///
/// MINPD xmm, xmm
/// __m128d {FP64}
@@ -887,7 +887,7 @@ public static unsafe partial class SSE2
public static __m128d _mm_min_pd(__m128d a, __m128d b) => System.Runtime.Intrinsics.X86.Sse2.Min(a.FP64, b.FP64);
///
- /// Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst".
+ /// Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [min_float_note]
///
/// MINSD xmm, xmm
/// __m128d {FP64}
@@ -1150,7 +1150,7 @@ public static unsafe partial class SSE2
/// __m128i {UI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_shuffle_epi32(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.Shuffle(a.UI32, (byte)imm8);
+ public static __m128i _mm_shuffle_epi32(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.Shuffle(a.UI32, (byte)imm8);
///
/// Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst".
@@ -1160,7 +1160,7 @@ public static unsafe partial class SSE2
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_shuffle_pd(__m128d a, __m128d b, int imm8) => System.Runtime.Intrinsics.X86.Sse2.Shuffle(a.FP64, b.FP64, (byte)imm8);
+ public static __m128d _mm_shuffle_pd(__m128d a, __m128d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.Shuffle(a.FP64, b.FP64, (byte)imm8);
///
/// Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst".
@@ -1169,7 +1169,7 @@ public static unsafe partial class SSE2
/// __m128i {UI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_shufflehi_epi16(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShuffleHigh(a.UI16, (byte)imm8);
+ public static __m128i _mm_shufflehi_epi16(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShuffleHigh(a.UI16, (byte)imm8);
///
/// Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst".
@@ -1178,7 +1178,7 @@ public static unsafe partial class SSE2
/// __m128i {UI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_shufflelo_epi16(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShuffleLow(a.UI16, (byte)imm8);
+ public static __m128i _mm_shufflelo_epi16(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShuffleLow(a.UI16, (byte)imm8);
///
/// Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst".
@@ -1214,7 +1214,7 @@ public static unsafe partial class SSE2
/// __m128i {UI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_slli_epi16(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(a.UI16, (byte)imm8);
+ public static __m128i _mm_slli_epi16(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(a.UI16, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1223,7 +1223,7 @@ public static unsafe partial class SSE2
/// __m128i {UI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_slli_epi32(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(a.UI32, (byte)imm8);
+ public static __m128i _mm_slli_epi32(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(a.UI32, (byte)imm8);
///
/// Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1232,7 +1232,7 @@ public static unsafe partial class SSE2
/// __m128i {UI64}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_slli_epi64(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(a.UI64, (byte)imm8);
+ public static __m128i _mm_slli_epi64(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftLeftLogical(a.UI64, (byte)imm8);
///
/// Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst".
@@ -1276,7 +1276,7 @@ public static unsafe partial class SSE2
/// __m128i {SI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_srai_epi16(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightArithmetic(a.SI16, (byte)imm8);
+ public static __m128i _mm_srai_epi16(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightArithmetic(a.SI16, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst".
@@ -1285,7 +1285,7 @@ public static unsafe partial class SSE2
/// __m128i {SI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_srai_epi32(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightArithmetic(a.SI32, (byte)imm8);
+ public static __m128i _mm_srai_epi32(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightArithmetic(a.SI32, (byte)imm8);
///
/// Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst".
@@ -1321,7 +1321,7 @@ public static unsafe partial class SSE2
/// __m128i {UI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_srli_epi16(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(a.UI16, (byte)imm8);
+ public static __m128i _mm_srli_epi16(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(a.UI16, (byte)imm8);
///
/// Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1330,7 +1330,7 @@ public static unsafe partial class SSE2
/// __m128i {UI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_srli_epi32(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(a.UI32, (byte)imm8);
+ public static __m128i _mm_srli_epi32(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(a.UI32, (byte)imm8);
///
/// Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst".
@@ -1339,7 +1339,7 @@ public static unsafe partial class SSE2
/// __m128i {UI64}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_srli_epi64(__m128i a, int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(a.UI64, (byte)imm8);
+ public static __m128i _mm_srli_epi64(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse2.ShiftRightLogical(a.UI64, (byte)imm8);
///
/// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception may be generated.
diff --git a/src/External/RawIntrinsics/SSE41.cs b/src/External/RawIntrinsics/SSE41.cs
index 105b0a8..cfd529f 100644
--- a/src/External/RawIntrinsics/SSE41.cs
+++ b/src/External/RawIntrinsics/SSE41.cs
@@ -10,7 +10,7 @@ public static unsafe partial class SSE41
/// __m128i {UI16}
/// int {IMM}
/// __m128i dst {UI16}
- public static __m128i _mm_blend_epi16(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Blend(a.UI16, b.UI16, (byte)imm8);
+ public static __m128i _mm_blend_epi16(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.Blend(a.UI16, b.UI16, (byte)imm8);
///
/// Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".
@@ -20,7 +20,7 @@ public static unsafe partial class SSE41
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_blend_pd(__m128d a, __m128d b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Blend(a.FP64, b.FP64, (byte)imm8);
+ public static __m128d _mm_blend_pd(__m128d a, __m128d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.Blend(a.FP64, b.FP64, (byte)imm8);
///
/// Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst".
@@ -30,7 +30,7 @@ public static unsafe partial class SSE41
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_blend_ps(__m128 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Blend(a.FP32, b.FP32, (byte)imm8);
+ public static __m128 _mm_blend_ps(__m128 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.Blend(a.FP32, b.FP32, (byte)imm8);
///
/// Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst".
@@ -209,7 +209,7 @@ public static unsafe partial class SSE41
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_dp_pd(__m128d a, __m128d b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.DotProduct(a.FP64, b.FP64, (byte)imm8);
+ public static __m128d _mm_dp_pd(__m128d a, __m128d b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.DotProduct(a.FP64, b.FP64, (byte)imm8);
///
/// Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8".
@@ -219,7 +219,7 @@ public static unsafe partial class SSE41
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_dp_ps(__m128 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.DotProduct(a.FP32, b.FP32, (byte)imm8);
+ public static __m128 _mm_dp_ps(__m128 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.DotProduct(a.FP32, b.FP32, (byte)imm8);
///
/// Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst".
@@ -228,7 +228,7 @@ public static unsafe partial class SSE41
/// __m128i {UI32}
/// int {IMM}
/// int dst {UI32}
- public static int _mm_extract_epi32(__m128i a, int imm8) => (int)System.Runtime.Intrinsics.X86.Sse41.Extract(a.UI32, (byte)imm8);
+ public static int _mm_extract_epi32(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => (int)System.Runtime.Intrinsics.X86.Sse41.Extract(a.UI32, (byte)imm8);
///
/// Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst".
@@ -237,7 +237,7 @@ public static unsafe partial class SSE41
/// __m128i {UI64}
/// int {IMM}
/// long dst {UI64}
- public static long _mm_extract_epi64(__m128i a, int imm8) => (long)System.Runtime.Intrinsics.X86.Sse41.X64.Extract(a.UI64, (byte)imm8);
+ public static long _mm_extract_epi64(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => (long)System.Runtime.Intrinsics.X86.Sse41.X64.Extract(a.UI64, (byte)imm8);
///
/// Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst".
@@ -246,7 +246,7 @@ public static unsafe partial class SSE41
/// __m128i {UI8}
/// int {IMM}
/// int dst {UI8}
- public static int _mm_extract_epi8(__m128i a, int imm8) => (int)System.Runtime.Intrinsics.X86.Sse41.Extract(a.UI8, (byte)imm8);
+ public static int _mm_extract_epi8(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => (int)System.Runtime.Intrinsics.X86.Sse41.Extract(a.UI8, (byte)imm8);
///
/// Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst".
@@ -255,7 +255,7 @@ public static unsafe partial class SSE41
/// __m128 {FP32}
/// int {IMM}
/// int dst {UI32}
- public static int _mm_extract_ps(__m128 a, int imm8) => (int)System.Runtime.Intrinsics.X86.Sse41.Extract(a.FP32, (byte)imm8);
+ public static int _mm_extract_ps(__m128 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => (int)System.Runtime.Intrinsics.X86.Sse41.Extract(a.FP32, (byte)imm8);
///
/// Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst".
@@ -299,7 +299,7 @@ public static unsafe partial class SSE41
/// int {UI32}
/// int {IMM}
/// __m128i dst {UI32}
- public static __m128i _mm_insert_epi32(__m128i a, int i, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Insert(a.UI32, (uint)i, (byte)imm8);
+ public static __m128i _mm_insert_epi32(__m128i a, int i, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.Insert(a.UI32, (uint)i, (byte)imm8);
///
/// Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8".
@@ -309,7 +309,7 @@ public static unsafe partial class SSE41
/// long {UI64}
/// int {IMM}
/// __m128i dst {UI64}
- public static __m128i _mm_insert_epi64(__m128i a, long i, int imm8) => System.Runtime.Intrinsics.X86.Sse41.X64.Insert(a.UI64, (ulong)i, (byte)imm8);
+ public static __m128i _mm_insert_epi64(__m128i a, long i, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.X64.Insert(a.UI64, (ulong)i, (byte)imm8);
///
/// Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8".
@@ -319,7 +319,7 @@ public static unsafe partial class SSE41
/// int {UI8}
/// int {IMM}
/// __m128i dst {UI8}
- public static __m128i _mm_insert_epi8(__m128i a, int i, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Insert(a.UI8, (byte)i, (byte)imm8);
+ public static __m128i _mm_insert_epi8(__m128i a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int i, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Insert(a.UI8, (byte)i, (byte)imm8);
///
/// Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set).
@@ -329,7 +329,7 @@ public static unsafe partial class SSE41
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_insert_ps(__m128 a, __m128 b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.Insert(a.FP32, b.FP32, (byte)imm8);
+ public static __m128 _mm_insert_ps(__m128 a, __m128 b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.Insert(a.FP32, b.FP32, (byte)imm8);
///
/// Compare packed signed 32-bit integers in "a" and "b", and store packed maximum values in "dst".
@@ -419,7 +419,7 @@ public static unsafe partial class SSE41
/// __m128i {UI8}
/// int {IMM}
/// __m128i dst {UI8}
- public static __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Sse41.MultipleSumAbsoluteDifferences(a.UI8, b.UI8, (byte)imm8);
+ public static __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Sse41.MultipleSumAbsoluteDifferences(a.UI8, b.UI8, (byte)imm8);
///
/// Multiply the low signed 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst".
@@ -455,7 +455,7 @@ public static unsafe partial class SSE41
/// __m128d {FP64}
/// int {IMM}
/// __m128d dst {FP64}
- public static __m128d _mm_round_pd(__m128d a, int rounding) => System.Runtime.Intrinsics.X86.Sse41.RoundToNearestInteger(a.FP64);
+ public static __m128d _mm_round_pd(__m128d a, int rounding) => System.Runtime.Intrinsics.X86.Sse41.RoundCurrentDirection(a.FP64);
///
/// Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". [round_note]
@@ -464,7 +464,7 @@ public static unsafe partial class SSE41
/// __m128 {FP32}
/// int {IMM}
/// __m128 dst {FP32}
- public static __m128 _mm_round_ps(__m128 a, int rounding) => System.Runtime.Intrinsics.X86.Sse41.RoundToNearestInteger(a.FP32);
+ public static __m128 _mm_round_ps(__m128 a, int rounding) => System.Runtime.Intrinsics.X86.Sse41.RoundCurrentDirection(a.FP32);
///
/// Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". [round_note]
diff --git a/src/External/RawIntrinsics/SSSE3.cs b/src/External/RawIntrinsics/SSSE3.cs
index 66c40b0..3c5e47d 100644
--- a/src/External/RawIntrinsics/SSSE3.cs
+++ b/src/External/RawIntrinsics/SSSE3.cs
@@ -34,7 +34,7 @@ public static unsafe partial class SSSE3
/// __m128i {UI8}
/// int {IMM}
/// __m128i dst {UI8}
- public static __m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm8) => System.Runtime.Intrinsics.X86.Ssse3.AlignRight(a.UI8, b.UI8, (byte)imm8);
+ public static __m128i _mm_alignr_epi8(__m128i a, __m128i b, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] int imm8) => System.Runtime.Intrinsics.X86.Ssse3.AlignRight(a.UI8, b.UI8, (byte)imm8);
///
/// Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst".
diff --git a/src/External/RawIntrinsicsGenerator/Generator.cs b/src/External/RawIntrinsicsGenerator/Generator.cs
index d4cc477..2320f98 100644
--- a/src/External/RawIntrinsicsGenerator/Generator.cs
+++ b/src/External/RawIntrinsicsGenerator/Generator.cs
@@ -15,7 +15,7 @@ public static class Generator
{
private const string SriDataUrl1 = @"https://raw.githubusercontent.com/dotnet/runtime/main/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/";
private const string SriDataUrl2 = @"https://raw.githubusercontent.com/dotnet/runtime/main/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/";
- private const string IntelDataUrl = @"https://www.intel.com/content/dam/develop/public/us/en/include/intrinsics-guide/data-3-6-5.xml";
+ private const string IntelDataFilePath = @"intel-intrinsics-guide-3.6.3.xml";
private static readonly Regex IntelMethodSignature = new(@"///\s+?(?