Skip to content

Commit

Permalink
[X86] Correct the cdisp8 encoding for VSCATTER/VGATHER prefetch (#122…
Browse files Browse the repository at this point in the history
…051)

during differential fuzzing, I found 8 more instructions with disp8
offset multiplier differences to binutils. somewhat sure there is a bug
in the X86 LLVM disp8 offset multipliers for this subset of vector
scatter and gather prefetch instructions. please check and refer to the
previous pull request: llvm/llvm-project#120340

these vector scatter and gather prefetch instructions also have an
unusual k mask operand position but I have not addressed this with this
patch as I am unsure how to change the Intel format in the tablegen
file.

```
hex:	62 f2 fd 49 c6 4c 51 01
llvm:	vgatherpf0dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vgatherpf0dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vgatherpf0dpd 	QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 4c 51 01
llvm:	vgatherpf0qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vgatherpf0qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vgatherpf0qps	DWORD PTR [rcx+zmm2*2+0x4]{k1}

hex:	62 f2 fd 49 c6 54 51 01
llvm:	vgatherpf1dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vgatherpf1dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vgatherpf1dpd	QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 54 51 01
llvm:	vgatherpf1qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vgatherpf1qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vgatherpf1qps	DWORD PTR [rcx+zmm2*2+0x4]{k1}

hex:	62 f2 fd 49 c6 6c 51 01
llvm:	vscatterpf0dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vscatterpf0dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vscatterpf0dpd	QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 6c 51 01
llvm:	vscatterpf0qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vscatterpf0qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vscatterpf0qps	DWORD PTR [rcx+zmm2*2+0x4]{k1}

hex:	62 f2 fd 49 c6 74 51 01
llvm:	vscatterpf1dpd	{k1}, zmmword ptr [rcx + 2*ymm2 + 4]
ours:	vscatterpf1dpd	qword ptr * 8 [rcx + 2*ymm2 + 8] {k1}
gnu:	vscatterpf1dpd QWORD PTR [rcx+ymm2*2+0x8]{k1}

hex:	62 f2 7d 49 c7 74 51 01
llvm:	vscatterpf1qps	{k1}, ymmword ptr [rcx + 2*zmm2 + 8]
ours:	vscatterpf1qps	dword ptr * 8 [rcx + 2*zmm2 + 4] {k1}
gnu:	vscatterpf1qps DWORD PTR [rcx+zmm2*2+0x4]{k1}
```
  • Loading branch information
michaeljclark authored Jan 11, 2025
1 parent 26d513d commit 212cba0
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 8 deletions.
16 changes: 8 additions & 8 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -10388,10 +10388,10 @@ defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;

defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
Expand All @@ -10400,10 +10400,10 @@ defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;

defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
Expand All @@ -10412,10 +10412,10 @@ defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;

defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
Expand All @@ -10424,10 +10424,10 @@ defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;

defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/MC/X86/avx512pf-64-att.s
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,67 @@ vscatterpf0qpd (%r14,%zmm14){%k7}
// CHECK: vscatterpf1qpd (%r15,%zmm13) {%k1}
// CHECK: encoding: [0x62,0x92,0xfd,0x49,0xc7,0x34,0x2f]
vscatterpf1qpd (%r15,%zmm13){%k1}

// CHECK: vgatherpf0dpd 8(%rcx,%ymm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x4c,0x51,0x01]
vgatherpf0dpd 8(%rcx,%ymm2,2){%k1}

// CHECK: vgatherpf0dps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x4c,0x51,0x01]
vgatherpf0dps 4(%rcx,%zmm2,2){%k1}

// CHECK: vgatherpf0qpd 8(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x4c,0x51,0x01]
vgatherpf0qpd 8(%rcx,%zmm2,2){%k1}

// CHECK: vgatherpf0qps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x4c,0x51,0x01]
vgatherpf0qps 4(%rcx,%zmm2,2){%k1}

// CHECK: vgatherpf1dpd 8(%rcx,%ymm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x54,0x51,0x01]
vgatherpf1dpd 8(%rcx,%ymm2,2){%k1}

// CHECK: vgatherpf1dps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x54,0x51,0x01]
vgatherpf1dps 4(%rcx,%zmm2,2){%k1}

// CHECK: vgatherpf1qpd 8(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x54,0x51,0x01]
vgatherpf1qpd 8(%rcx,%zmm2,2){%k1}

// CHECK: vgatherpf1qps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x54,0x51,0x01]
vgatherpf1qps 4(%rcx,%zmm2,2){%k1}

// CHECK: vscatterpf0dpd 8(%rcx,%ymm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x6c,0x51,0x01]
vscatterpf0dpd 8(%rcx,%ymm2,2){%k1}

// CHECK: vscatterpf0dps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x6c,0x51,0x01]
vscatterpf0dps 4(%rcx,%zmm2,2){%k1}

// CHECK: vscatterpf0qpd 8(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x6c,0x51,0x01]
vscatterpf0qpd 8(%rcx,%zmm2,2){%k1}

// CHECK: vscatterpf0qps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x6c,0x51,0x01]
vscatterpf0qps 4(%rcx,%zmm2,2){%k1}

// CHECK: vscatterpf1dpd 8(%rcx,%ymm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x74,0x51,0x01]
vscatterpf1dpd 8(%rcx,%ymm2,2){%k1}

// CHECK: vscatterpf1dps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x74,0x51,0x01]
vscatterpf1dps 4(%rcx,%zmm2,2){%k1}

// CHECK: vscatterpf1qpd 8(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x74,0x51,0x01]
vscatterpf1qpd 8(%rcx,%zmm2,2){%k1}

// CHECK: vscatterpf1qps 4(%rcx,%zmm2,2) {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x74,0x51,0x01]
vscatterpf1qps 4(%rcx,%zmm2,2){%k1}

0 comments on commit 212cba0

Please sign in to comment.