Skip to content

Commit

Permalink
[SelectionDAG] Lower must_preserve_tags memset without capabilities
Browse files Browse the repository at this point in the history
When a must_preserve_tags memcpy is converted to a memset, we may end up
using a non-capability MVT as the copy type. This change allow us to use
inline integer memset lowering for copies from zero constants that are
not necessarily sufficiently aligned. This also fixes an assertion
found while compiling ICU4C for Morello where a copy from a large zero
constant was lowered using NEON registers instead of capability ones.
  • Loading branch information
arichardson committed Apr 30, 2024
1 parent 7abe1ac commit 0e82a82
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 77 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6887,7 +6887,7 @@ static SDValue getMemcpyLoadsAndStores(
// TODO: the frontend/optimization passes probably shouldn't emit
// must-preserve-tags for such small memcpys
auto CapTy = TLI.cheriCapabilityType();
if (CapTy.isValid()) {
if (CapTy.isValid() && !Op.isMemset()) {
const uint64_t CapSize = CapTy.getStoreSize();
if (PreserveTags == PreserveCheriTags::Required && !ReachedLimit &&
Size >= CapSize && (!FoundLowering || !MemOps[0].isFatPointer())) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ bool TargetLowering::findOptimalMemOpLowering(

// XXXAR: (ab)use MVT::isVoid to indicate that a memcpy call must be made
if (VT == MVT::isVoid) {
assert(!Op.isMemset() && "MVT::isVoid should only be used for copies");
return false; // cannot lower as memops
}
// If the type is a fat pointer, then forcibly disable overlap.
Expand Down
30 changes: 4 additions & 26 deletions llvm/test/CodeGen/CHERI-Generic/MIPS/memcpy-from-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,20 +142,9 @@ do.body:
define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) {
; CHECK-LABEL: copy_from_underaligned_zero_constant:
; CHECK: # %bb.0: # %do.body
; CHECK-NEXT: cincoffset $c11, $c11, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset 89, -16
; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-NEXT: cgetpccincoffset $c1, $1
; CHECK-NEXT: clcbi $c4, %captab20(zero_constant)($c1)
; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1)
; CHECK-NEXT: cjalr $c12, $c17
; CHECK-NEXT: daddiu $4, $zero, 16
; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-NEXT: csd $zero, $zero, 0($c3)
; CHECK-NEXT: cjr $c17
; CHECK-NEXT: cincoffset $c11, $c11, 16
; CHECK-NEXT: csd $zero, $zero, 8($c3)
do.body:
call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1
ret void
Expand All @@ -164,20 +153,9 @@ do.body:
define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) {
; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve:
; CHECK: # %bb.0: # %do.body
; CHECK-NEXT: cincoffset $c11, $c11, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csc $c17, $zero, 0($c11) # 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset 89, -16
; CHECK-NEXT: lui $1, %pcrel_hi(_CHERI_CAPABILITY_TABLE_-8)
; CHECK-NEXT: daddiu $1, $1, %pcrel_lo(_CHERI_CAPABILITY_TABLE_-4)
; CHECK-NEXT: cgetpccincoffset $c1, $1
; CHECK-NEXT: clcbi $c4, %captab20(zero_constant)($c1)
; CHECK-NEXT: clcbi $c12, %capcall20(memcpy)($c1)
; CHECK-NEXT: cjalr $c12, $c17
; CHECK-NEXT: daddiu $4, $zero, 16
; CHECK-NEXT: clc $c17, $zero, 0($c11) # 16-byte Folded Reload
; CHECK-NEXT: csd $zero, $zero, 0($c3)
; CHECK-NEXT: cjr $c17
; CHECK-NEXT: cincoffset $c11, $c11, 16
; CHECK-NEXT: csd $zero, $zero, 8($c3)
do.body:
call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1
ret void
Expand Down
30 changes: 4 additions & 26 deletions llvm/test/CodeGen/CHERI-Generic/RISCV32/memcpy-from-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,8 @@ do.body:
define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) {
; CHECK-LABEL: copy_from_underaligned_zero_constant:
; CHECK: # %bb.0: # %do.body
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csc cra, 8(csp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .LBB10_1: # %do.body
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant)
; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB10_1)(ca1)
; CHECK-NEXT: li a2, 8
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: ccall memcpy
; CHECK-NEXT: clc cra, 8(csp) # 8-byte Folded Reload
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: csw zero, 4(ca0)
; CHECK-NEXT: csw zero, 0(ca0)
; CHECK-NEXT: cret
do.body:
call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1
Expand All @@ -164,19 +153,8 @@ do.body:
define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) {
; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve:
; CHECK: # %bb.0: # %do.body
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csc cra, 8(csp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .LBB11_1: # %do.body
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant)
; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB11_1)(ca1)
; CHECK-NEXT: li a2, 8
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: ccall memcpy
; CHECK-NEXT: clc cra, 8(csp) # 8-byte Folded Reload
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: csw zero, 4(ca0)
; CHECK-NEXT: csw zero, 0(ca0)
; CHECK-NEXT: cret
do.body:
call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1
Expand Down
28 changes: 4 additions & 24 deletions llvm/test/CodeGen/CHERI-Generic/RISCV64/memcpy-from-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,18 +142,8 @@ do.body:
define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) {
; CHECK-LABEL: copy_from_underaligned_zero_constant:
; CHECK: # %bb.0: # %do.body
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csc cra, 0(csp) # 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -16
; CHECK-NEXT: .LBB10_1: # %do.body
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant)
; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB10_1)(ca1)
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: ccall memcpy
; CHECK-NEXT: clc cra, 0(csp) # 16-byte Folded Reload
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: csd zero, 8(ca0)
; CHECK-NEXT: csd zero, 0(ca0)
; CHECK-NEXT: cret
do.body:
call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1
Expand All @@ -163,18 +153,8 @@ do.body:
define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) {
; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve:
; CHECK: # %bb.0: # %do.body
; CHECK-NEXT: cincoffset csp, csp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csc cra, 0(csp) # 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -16
; CHECK-NEXT: .LBB11_1: # %do.body
; CHECK-NEXT: # Label of block must be emitted
; CHECK-NEXT: auipcc ca1, %captab_pcrel_hi(zero_constant)
; CHECK-NEXT: clc ca1, %pcrel_lo(.LBB11_1)(ca1)
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: ccall memcpy
; CHECK-NEXT: clc cra, 0(csp) # 16-byte Folded Reload
; CHECK-NEXT: cincoffset csp, csp, 16
; CHECK-NEXT: csd zero, 8(ca0)
; CHECK-NEXT: csd zero, 0(ca0)
; CHECK-NEXT: cret
do.body:
call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1
Expand Down

0 comments on commit 0e82a82

Please sign in to comment.