Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wasm] Add Vector128 and PackedSimd support to the jiterpreter; add PackedSimd to the interpreter #82773

Merged
merged 4 commits into from
May 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mono/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ elseif(CLR_CMAKE_HOST_OS STREQUAL "emscripten")
add_compile_options(-Wno-strict-prototypes)
add_compile_options(-Wno-unused-but-set-variable)
add_compile_options(-Wno-single-bit-bitfield-constant-conversion)
add_compile_options(-msimd128)
set(DISABLE_EXECUTABLES 1)
# FIXME: Is there a cmake option for this ?
set(DISABLE_SHARED_LIBS 1)
Expand Down
8 changes: 7 additions & 1 deletion src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ typedef enum {

#define PROFILE_INTERP 0

#if !HOST_BROWSER && __GNUC__
#if __GNUC__
#define INTERP_ENABLE_SIMD
#endif

Expand Down Expand Up @@ -342,6 +342,12 @@ mono_jiterp_stackval_from_data (MonoType *type, stackval *result, const void *da
gpointer
mono_jiterp_frame_data_allocator_alloc (FrameDataAllocator *stack, InterpFrame *frame, int size);

gpointer
mono_jiterp_get_simd_intrinsic (int arity, int index);

int
mono_jiterp_get_simd_opcode (int arity, int index);

#endif

static inline int
Expand Down
266 changes: 185 additions & 81 deletions src/mono/mono/mini/interp/interp-simd-intrins.def

Large diffs are not rendered by default.

137 changes: 128 additions & 9 deletions src/mono/mono/mini/interp/interp-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
#include "interp-internals.h"
#include "interp-simd.h"

#if HOST_BROWSER
#include <wasm_simd128.h>
#endif

#ifdef INTERP_ENABLE_SIMD

typedef gint64 v128_i8 __attribute__ ((vector_size (SIZEOF_V128)));
Expand All @@ -12,6 +16,7 @@ typedef gint16 v128_i2 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint16 v128_u2 __attribute__ ((vector_size (SIZEOF_V128)));
typedef gint8 v128_i1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint8 v128_u1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef float v128_r4 __attribute__ ((vector_size (SIZEOF_V128)));

// get_AllBitsSet
static void
Expand Down Expand Up @@ -39,6 +44,12 @@ interp_v128_i4_op_addition (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 + *(v128_i4*)v2;
}

static void
interp_v128_r4_op_addition (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 + *(v128_r4*)v2;
}

// op_Subtraction
static void
interp_v128_i1_op_subtraction (gpointer res, gpointer v1, gpointer v2)
Expand All @@ -58,6 +69,12 @@ interp_v128_i4_op_subtraction (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 - *(v128_i4*)v2;
}

static void
interp_v128_r4_op_subtraction (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 - *(v128_r4*)v2;
}

// op_BitwiseAnd
static void
interp_v128_op_bitwise_and (gpointer res, gpointer v1, gpointer v2)
Expand Down Expand Up @@ -124,6 +141,18 @@ interp_v128_i4_op_multiply (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 * *(v128_i4*)v2;
}

static void
interp_v128_r4_op_multiply (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 * *(v128_r4*)v2;
}

static void
interp_v128_r4_op_division (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 / *(v128_r4*)v2;
}

// op_UnaryNegation
static void
interp_v128_i1_op_negation (gpointer res, gpointer v1)
Expand Down Expand Up @@ -535,32 +564,122 @@ interp_v128_i8_shuffle (gpointer res, gpointer v1, gpointer v2)
V128_SHUFFLE (gint64, guint64);
}

#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

// For the wasm packed simd intrinsics we want to automatically generate the C implementations from
// their corresponding clang intrinsics. See also:
// https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/wasm_simd128.h
// In this context V means Vector128 and P means void* pointer.
#ifdef HOST_BROWSER

static v128_t
_interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
g_assert_not_reached ();
}

#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (v1); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
*((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
}

#include "interp-simd-intrins.def"

#undef INTERP_WASM_SIMD_INTRINSIC_V_P
#undef INTERP_WASM_SIMD_INTRINSIC_V_V
#undef INTERP_WASM_SIMD_INTRINSIC_I_V
#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV

// Now generate the wasm opcode tables for the intrinsics

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) c,

int interp_simd_p_p_wasm_opcode_table [] = {
#include "interp-simd-intrins.def"
};

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) c,

int interp_simd_p_pp_wasm_opcode_table [] = {
#include "interp-simd-intrins.def"
};

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) c,

int interp_simd_p_ppp_wasm_opcode_table [] = {
#include "interp-simd-intrins.def"
};

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#endif // HOST_BROWSER

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b) b,
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) b,
PP_SIMD_Method interp_simd_p_p_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b) b,
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) b,
PPP_SIMD_Method interp_simd_p_pp_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b) b,
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) b,
PPPP_SIMD_Method interp_simd_p_ppp_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#endif // INTERP_ENABLE_SIMD
6 changes: 6 additions & 0 deletions src/mono/mono/mini/interp/interp-simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ extern PP_SIMD_Method interp_simd_p_p_table [];
extern PPP_SIMD_Method interp_simd_p_pp_table [];
extern PPPP_SIMD_Method interp_simd_p_ppp_table [];

#if HOST_BROWSER
extern int interp_simd_p_p_wasm_opcode_table [];
extern int interp_simd_p_pp_wasm_opcode_table [];
extern int interp_simd_p_ppp_wasm_opcode_table [];
#endif

#endif /* __MONO_MINI_INTERP_SIMD_H__ */


38 changes: 38 additions & 0 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -8907,4 +8907,42 @@ mono_jiterp_enum_hasflag (MonoClass *klass, gint32 *dest, stackval *sp1, stackva
*dest = mono_interp_enum_hasflag (sp1, sp2, klass);
}

EMSCRIPTEN_KEEPALIVE gpointer
mono_jiterp_get_simd_intrinsic (int arity, int index)
{
#ifdef INTERP_ENABLE_SIMD
switch (arity) {
case 1:
return interp_simd_p_p_table [index];
case 2:
return interp_simd_p_pp_table [index];
case 3:
return interp_simd_p_ppp_table [index];
default:
g_assert_not_reached();
}
#else
g_assert_not_reached();
#endif
}

EMSCRIPTEN_KEEPALIVE int
mono_jiterp_get_simd_opcode (int arity, int index)
{
#ifdef INTERP_ENABLE_SIMD
switch (arity) {
case 1:
return interp_simd_p_p_wasm_opcode_table [index];
case 2:
return interp_simd_p_pp_wasm_opcode_table [index];
case 3:
return interp_simd_p_ppp_wasm_opcode_table [index];
default:
g_assert_not_reached();
}
#else
g_assert_not_reached();
#endif
}

#endif
18 changes: 9 additions & 9 deletions src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,35 +41,35 @@ typedef enum {

/* SIMD opcodes, grouped by signature */

#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b) a,
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
} MintSIMDOpsPP;
#undef INTERP_SIMD_INTRINSIC_P_P
#define INTERP_SIMD_INTRINSIC_P_P(a,b)
#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b) a,
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
INTERP_SIMD_INTRINSIC_P_PP_LAST
} MintSIMDOpsPPP;
#undef INTERP_SIMD_INTRINSIC_P_PP
#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)

#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b) a,
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
INTERP_SIMD_INTRINSIC_P_PPP_LAST
} MintSIMDOpsPPPP;
#undef INTERP_SIMD_INTRINSIC_P_PPP
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)

#if NO_UNALIGNED_ACCESS
# if G_BYTE_ORDER == G_LITTLE_ENDIAN
Expand Down
20 changes: 20 additions & 0 deletions src/mono/mono/mini/interp/simd-methods.def
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
SIMD_METHOD(get_Count)
SIMD_METHOD(get_AllBitsSet)
SIMD_METHOD(get_IsHardwareAccelerated)
SIMD_METHOD(get_IsSupported)
SIMD_METHOD(get_Item)
SIMD_METHOD(get_One)
SIMD_METHOD(get_Zero)
SIMD_METHOD(op_Addition)
SIMD_METHOD(op_BitwiseAnd)
SIMD_METHOD(op_BitwiseOr)
SIMD_METHOD(op_Division)
SIMD_METHOD(op_Equality)
SIMD_METHOD(op_ExclusiveOr)
SIMD_METHOD(op_Explicit)
Expand All @@ -24,6 +26,7 @@ SIMD_METHOD(ConditionalSelect)
SIMD_METHOD(Create)
SIMD_METHOD(CreateScalar)
SIMD_METHOD(CreateScalarUnsafe)

SIMD_METHOD(Equals)
SIMD_METHOD(ExtractMostSignificantBits)
SIMD_METHOD(GreaterThan)
Expand All @@ -36,3 +39,20 @@ SIMD_METHOD(ShiftRightLogical)
SIMD_METHOD(Shuffle)
SIMD_METHOD(WidenLower)
SIMD_METHOD(WidenUpper)

// PackedSimd
SIMD_METHOD(Splat)
SIMD_METHOD(ExtractLane)
SIMD_METHOD(ReplaceLane)
SIMD_METHOD(Swizzle)
SIMD_METHOD(Add)
SIMD_METHOD(Subtract)
SIMD_METHOD(Multiply)
SIMD_METHOD(Dot)
SIMD_METHOD(Negate)
SIMD_METHOD(And)
SIMD_METHOD(Bitmask)
SIMD_METHOD(CompareEqual)
SIMD_METHOD(CompareNotEqual)
SIMD_METHOD(ConvertNarrowingSignedSaturate)
SIMD_METHOD(ConvertNarrowingUnsignedSaturate)
Loading