Skip to content

Commit

Permalink
Ensure that the SSE fallback for Vector3.Dot masks off the unused ele…
Browse files Browse the repository at this point in the history
…ment of op1 and op2 (#74932)

* Ensure that the SSE fallback for Vector3.Dot masks off the unused element of op1 and op2

* Applying formatting patch

* Ensure we use TYP_SIMD16 in for the simdType when generating the fallback Dot nodes
  • Loading branch information
tannergooding authored Sep 2, 2022
1 parent abe0208 commit ca82565
Showing 1 changed file with 49 additions and 17 deletions.
66 changes: 49 additions & 17 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3419,45 +3419,77 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
{
assert(simdBaseType == TYP_FLOAT);

// We need to mask off the most significant element to avoid the shuffle + add
// from including it in the computed result. We need to do this for both op1 and
// op2 in case one of them is `NaN` (because Zero * NaN == NaN)

simd16_t simd16Val = {};

simd16Val.i32[0] = -1;
simd16Val.i32[1] = -1;
simd16Val.i32[2] = -1;
simd16Val.i32[3] = +0;

simdType = TYP_SIMD16;
simdSize = 16;

// We will be constructing the following parts:
// ...
// +--* CNS_INT int -1
// +--* CNS_INT int -1
// +--* CNS_INT int -1
// +--* CNS_INT int 0
// tmp1 = * HWINTRINSIC simd16 T Create
// /--* op2 simd16
// /--* op1 simd16
// +--* tmp1 simd16
// op1 = * HWINTRINSIC simd16 T And
// ...

// This is roughly the following managed code:
// ...
// tmp1 = Vector128.Create(-1, -1, -1, 0);
// op1 = Sse.And(op1, tmp2);
// op1 = Sse.And(op1, tmp1);
// ...

GenTree* cns0 = comp->gtNewIconNode(-1, TYP_INT);
BlockRange().InsertAfter(op1, cns0);
GenTreeVecCon* vecCon1 = comp->gtNewVconNode(simdType, simdBaseJitType);
vecCon1->gtSimd16Val = simd16Val;

GenTree* cns1 = comp->gtNewIconNode(-1, TYP_INT);
BlockRange().InsertAfter(cns0, cns1);
BlockRange().InsertAfter(op1, vecCon1);

GenTree* cns2 = comp->gtNewIconNode(-1, TYP_INT);
BlockRange().InsertAfter(cns1, cns2);
op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, vecCon1, NI_SSE_And, simdBaseJitType, simdSize);
BlockRange().InsertAfter(vecCon1, op1);

GenTree* cns3 = comp->gtNewIconNode(0, TYP_INT);
BlockRange().InsertAfter(cns2, cns3);
LowerNode(vecCon1);
LowerNode(op1);

tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, cns0, cns1, cns2, cns3, NI_Vector128_Create,
CORINFO_TYPE_INT, 16);
BlockRange().InsertAfter(cns3, tmp1);
// We will be constructing the following parts:
// ...
// +--* CNS_INT int -1
// +--* CNS_INT int -1
// +--* CNS_INT int -1
// +--* CNS_INT int 0
// tmp2 = * HWINTRINSIC simd16 T Create
// /--* op2 simd16
// +--* tmp2 simd16
// op2 = * HWINTRINSIC simd16 T And
// ...

op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, tmp1, NI_SSE_And, simdBaseJitType, simdSize);
BlockRange().InsertAfter(tmp1, op1);
// This is roughly the following managed code:
// ...
// tmp2 = Vector128.Create(-1, -1, -1, 0);
// op2 = Sse.And(op2, tmp2);
// ...

LowerNode(tmp1);
LowerNode(op1);
GenTreeVecCon* vecCon2 = comp->gtNewVconNode(simdType, simdBaseJitType);
vecCon2->gtSimd16Val = simd16Val;

BlockRange().InsertAfter(op2, vecCon2);

op2 = comp->gtNewSimdHWIntrinsicNode(simdType, op2, vecCon2, NI_SSE_And, simdBaseJitType, simdSize);
BlockRange().InsertAfter(vecCon2, op2);

LowerNode(vecCon2);
LowerNode(op2);
}
}

Expand Down

0 comments on commit ca82565

Please sign in to comment.