Skip to content

Commit

Permalink
Clear the stack in AES-XTS AVX512 implementation (#1415)
Browse files Browse the repository at this point in the history
  • Loading branch information
pittma authored Jan 30, 2024
1 parent eaa19c7 commit 67d3e50
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 38 deletions.
136 changes: 104 additions & 32 deletions crypto/fipsmodule/aes/asm/aesni-xts-avx512.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1832,29 +1832,65 @@
vmovdqu %xmm8,-0x10($output)
___
}
$code .= "\n.L_ret_${rndsuffix}:\n";
$code .= "mov $GP_STORAGE($TW),%rbx\n";

{
$code .= <<___;
.L_ret_${rndsuffix}:
mov $GP_STORAGE($TW),%rbx
xor $tmp1,$tmp1
mov $tmp1,$GP_STORAGE($TW)
# Zero-out the whole of `%zmm0`.
vpxorq %zmm0,%zmm0,%zmm0
___
}

if ($win64) {
$code .= "mov $GP_STORAGE + 8*1($TW),%rdi\n";
$code .= "mov $GP_STORAGE + 8*2($TW),%rsi\n";
$code .= <<___;
mov $GP_STORAGE + 8*1($TW),%rdi
mov $tmp1,$GP_STORAGE + 8*1($TW)
mov $GP_STORAGE + 8*2($TW),%rsi
mov $tmp1,$GP_STORAGE + 8*2($TW)
$code .= "vmovdqa $XMM_STORAGE + 16 * 0($TW), %xmm6\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 1($TW), %xmm7\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 2($TW), %xmm8\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 3($TW), %xmm9\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 4($TW), %xmm10\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 5($TW), %xmm11\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 6($TW), %xmm12\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 7($TW), %xmm13\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 8($TW), %xmm14\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 9($TW), %xmm15\n";
vmovdqa $XMM_STORAGE + 16 * 0($TW), %xmm6
vmovdqa $XMM_STORAGE + 16 * 1($TW), %xmm7
vmovdqa $XMM_STORAGE + 16 * 2($TW), %xmm8
vmovdqa $XMM_STORAGE + 16 * 3($TW), %xmm9
# Zero the 64 bytes we just restored to the xmm registers.
vmovdqa64 %zmm0,$XMM_STORAGE($TW)
vmovdqa $XMM_STORAGE + 16 * 4($TW), %xmm10
vmovdqa $XMM_STORAGE + 16 * 5($TW), %xmm11
vmovdqa $XMM_STORAGE + 16 * 6($TW), %xmm12
vmovdqa $XMM_STORAGE + 16 * 7($TW), %xmm13
# And again.
vmovdqa64 %zmm0,$XMM_STORAGE + 16 * 4($TW)
vmovdqa $XMM_STORAGE + 16 * 8($TW), %xmm14
vmovdqa $XMM_STORAGE + 16 * 9($TW), %xmm15
# Last round is only 32 bytes (256-bits), so we use `%ymm` as the
# source operand.
vmovdqa %ymm0,$XMM_STORAGE + 16 * 8($TW)
___
}

{
$code .= <<___;
mov %rbp,%rsp
pop %rbp
# Zero-out the stack frames used for `key1`, 64 bytes at a time.
vmovdqa64 %zmm0,0x80(%rsp)
vmovdqa64 %zmm0,0xc0(%rsp)
vmovdqa64 %zmm0,0x100(%rsp)
# Stack usage is not divisible by 64, so we use a kmask register to
# only mov 48 of the bytes (6 quad-words).
mov \$0x3f,$tmp1
kmovq $tmp1,%k2
vmovdqa64 %zmm0,0x140(%rsp){%k2}
mov %rbp,%rsp
pop %rbp
vzeroupper
ret
Expand Down Expand Up @@ -2571,29 +2607,65 @@
vmovdqu %xmm8,-0x10($output)
___
}
$code .= "\n.L_ret_${rndsuffix}:\n";
$code .= "mov $GP_STORAGE($TW),%rbx\n";

{
$code .= <<___;
.L_ret_${rndsuffix}:
mov $GP_STORAGE($TW),%rbx
xor $tmp1,$tmp1
mov $tmp1,$GP_STORAGE($TW)
# Zero-out the whole of `%zmm0`.
vpxorq %zmm0,%zmm0,%zmm0
___
}

if ($win64) {
$code .= "mov $GP_STORAGE + 8*1($TW),%rdi\n";
$code .= "mov $GP_STORAGE + 8*2($TW),%rsi\n";
$code .= <<___;
mov $GP_STORAGE + 8*1($TW),%rdi
mov $tmp1,$GP_STORAGE + 8*1($TW)
mov $GP_STORAGE + 8*2($TW),%rsi
mov $tmp1,$GP_STORAGE + 8*2($TW)
$code .= "vmovdqa $XMM_STORAGE + 16 * 0($TW), %xmm6\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 1($TW), %xmm7\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 2($TW), %xmm8\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 3($TW), %xmm9\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 4($TW), %xmm10\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 5($TW), %xmm11\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 6($TW), %xmm12\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 7($TW), %xmm13\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 8($TW), %xmm14\n";
$code .= "vmovdqa $XMM_STORAGE + 16 * 9($TW), %xmm15\n";
vmovdqa $XMM_STORAGE + 16 * 0($TW), %xmm6
vmovdqa $XMM_STORAGE + 16 * 1($TW), %xmm7
vmovdqa $XMM_STORAGE + 16 * 2($TW), %xmm8
vmovdqa $XMM_STORAGE + 16 * 3($TW), %xmm9
# Zero the 64 bytes we just restored to the xmm registers.
vmovdqa64 %zmm0,$XMM_STORAGE($TW)
vmovdqa $XMM_STORAGE + 16 * 4($TW), %xmm10
vmovdqa $XMM_STORAGE + 16 * 5($TW), %xmm11
vmovdqa $XMM_STORAGE + 16 * 6($TW), %xmm12
vmovdqa $XMM_STORAGE + 16 * 7($TW), %xmm13
# And again.
vmovdqa64 %zmm0,$XMM_STORAGE + 16 * 4($TW)
vmovdqa $XMM_STORAGE + 16 * 8($TW), %xmm14
vmovdqa $XMM_STORAGE + 16 * 9($TW), %xmm15
# Last round is only 32 bytes (256-bits), so we use `%ymm` as the
# source operand.
vmovdqa %ymm0,$XMM_STORAGE + 16 * 8($TW)
___
}

{
$code .= <<___;
mov %rbp,%rsp
pop %rbp
# Zero-out the stack frames used for `key1`, 64 bytes at a time.
vmovdqa64 %zmm0,0x80(%rsp)
vmovdqa64 %zmm0,0xc0(%rsp)
vmovdqa64 %zmm0,0x100(%rsp)
# Stack usage is not divisible by 64, so we use a kmask register to
# only mov 48 of the bytes (6 quad-words).
mov \$0x3f,$tmp1
kmovq $tmp1,%k2
vmovdqa64 %zmm0,0x140(%rsp){%k2}
mov %rbp,%rsp
pop %rbp
vzeroupper
ret
Expand Down
32 changes: 30 additions & 2 deletions generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S
Original file line number Diff line number Diff line change
Expand Up @@ -1024,9 +1024,23 @@ aes_hw_xts_encrypt_avx512:
.byte 98,114,61,8,221,132,36,96,1,0,0
vpxor %xmm0,%xmm8,%xmm8
vmovdqu %xmm8,-16(%rsi)

.L_ret_hEgxyDlCngwrfFe:
movq 368(%rsp),%rbx
xorq %r8,%r8
movq %r8,368(%rsp)

vpxorq %zmm0,%zmm0,%zmm0

vmovdqa64 %zmm0,128(%rsp)
vmovdqa64 %zmm0,192(%rsp)
vmovdqa64 %zmm0,256(%rsp)



movq $0x3f,%r8
kmovq %r8,%k2
vmovdqa64 %zmm0,320(%rsp){%k2}

movq %rbp,%rsp
popq %rbp
vzeroupper
Expand Down Expand Up @@ -3550,9 +3564,23 @@ aes_hw_xts_decrypt_avx512:
.L_done_amivrujEyduiFoi:

vmovdqu %xmm8,-16(%rsi)

.L_ret_amivrujEyduiFoi:
movq 368(%rsp),%rbx
xorq %r8,%r8
movq %r8,368(%rsp)

vpxorq %zmm0,%zmm0,%zmm0

vmovdqa64 %zmm0,128(%rsp)
vmovdqa64 %zmm0,192(%rsp)
vmovdqa64 %zmm0,256(%rsp)



movq $0x3f,%r8
kmovq %r8,%k2
vmovdqa64 %zmm0,320(%rsp){%k2}

movq %rbp,%rsp
popq %rbp
vzeroupper
Expand Down
32 changes: 30 additions & 2 deletions generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S
Original file line number Diff line number Diff line change
Expand Up @@ -1024,9 +1024,23 @@ L$_steal_cipher_hEgxyDlCngwrfFe:
.byte 98,114,61,8,221,132,36,96,1,0,0
vpxor %xmm0,%xmm8,%xmm8
vmovdqu %xmm8,-16(%rsi)

L$_ret_hEgxyDlCngwrfFe:
movq 368(%rsp),%rbx
xorq %r8,%r8
movq %r8,368(%rsp)

vpxorq %zmm0,%zmm0,%zmm0

vmovdqa64 %zmm0,128(%rsp)
vmovdqa64 %zmm0,192(%rsp)
vmovdqa64 %zmm0,256(%rsp)



movq $0x3f,%r8
kmovq %r8,%k2
vmovdqa64 %zmm0,320(%rsp){%k2}

movq %rbp,%rsp
popq %rbp
vzeroupper
Expand Down Expand Up @@ -3550,9 +3564,23 @@ L$_steal_cipher_amivrujEyduiFoi:
L$_done_amivrujEyduiFoi:

vmovdqu %xmm8,-16(%rsi)

L$_ret_amivrujEyduiFoi:
movq 368(%rsp),%rbx
xorq %r8,%r8
movq %r8,368(%rsp)

vpxorq %zmm0,%zmm0,%zmm0

vmovdqa64 %zmm0,128(%rsp)
vmovdqa64 %zmm0,192(%rsp)
vmovdqa64 %zmm0,256(%rsp)



movq $0x3f,%r8
kmovq %r8,%k2
vmovdqa64 %zmm0,320(%rsp){%k2}

movq %rbp,%rsp
popq %rbp
vzeroupper
Expand Down
62 changes: 60 additions & 2 deletions generated-src/win-x86_64/crypto/fipsmodule/aesni-xts-avx512.asm
Original file line number Diff line number Diff line change
Expand Up @@ -1042,21 +1042,50 @@ $L$_steal_cipher_hEgxyDlCngwrfFe:
DB 98,114,61,8,221,132,36,96,1,0,0
vpxor xmm8,xmm8,xmm0
vmovdqu XMMWORD[(-16)+rdx],xmm8

$L$_ret_hEgxyDlCngwrfFe:
mov rbx,QWORD[528+rsp]
xor r10,r10
mov QWORD[528+rsp],r10

vpxorq zmm0,zmm0,zmm0
mov rdi,QWORD[((528 + 8))+rsp]
mov QWORD[((528 + 8))+rsp],r10
mov rsi,QWORD[((528 + 16))+rsp]
mov QWORD[((528 + 16))+rsp],r10

vmovdqa xmm6,XMMWORD[((368 + 0))+rsp]
vmovdqa xmm7,XMMWORD[((368 + 16))+rsp]
vmovdqa xmm8,XMMWORD[((368 + 32))+rsp]
vmovdqa xmm9,XMMWORD[((368 + 48))+rsp]


vmovdqa64 ZMMWORD[368+rsp],zmm0

vmovdqa xmm10,XMMWORD[((368 + 64))+rsp]
vmovdqa xmm11,XMMWORD[((368 + 80))+rsp]
vmovdqa xmm12,XMMWORD[((368 + 96))+rsp]
vmovdqa xmm13,XMMWORD[((368 + 112))+rsp]


vmovdqa64 ZMMWORD[(368 + 64)+rsp],zmm0

vmovdqa xmm14,XMMWORD[((368 + 128))+rsp]
vmovdqa xmm15,XMMWORD[((368 + 144))+rsp]



vmovdqa YMMWORD[(368 + 128)+rsp],ymm0

vmovdqa64 ZMMWORD[128+rsp],zmm0
vmovdqa64 ZMMWORD[192+rsp],zmm0
vmovdqa64 ZMMWORD[256+rsp],zmm0



mov r10,0x3f
kmovq k2,r10
vmovdqa64 ZMMWORD[320+rsp]{k2},zmm0

mov rsp,rbp
pop rbp
vzeroupper
Expand Down Expand Up @@ -3593,21 +3622,50 @@ $L$_steal_cipher_amivrujEyduiFoi:
$L$_done_amivrujEyduiFoi:

vmovdqu XMMWORD[(-16)+rdx],xmm8

$L$_ret_amivrujEyduiFoi:
mov rbx,QWORD[528+rsp]
xor r10,r10
mov QWORD[528+rsp],r10

vpxorq zmm0,zmm0,zmm0
mov rdi,QWORD[((528 + 8))+rsp]
mov QWORD[((528 + 8))+rsp],r10
mov rsi,QWORD[((528 + 16))+rsp]
mov QWORD[((528 + 16))+rsp],r10

vmovdqa xmm6,XMMWORD[((368 + 0))+rsp]
vmovdqa xmm7,XMMWORD[((368 + 16))+rsp]
vmovdqa xmm8,XMMWORD[((368 + 32))+rsp]
vmovdqa xmm9,XMMWORD[((368 + 48))+rsp]


vmovdqa64 ZMMWORD[368+rsp],zmm0

vmovdqa xmm10,XMMWORD[((368 + 64))+rsp]
vmovdqa xmm11,XMMWORD[((368 + 80))+rsp]
vmovdqa xmm12,XMMWORD[((368 + 96))+rsp]
vmovdqa xmm13,XMMWORD[((368 + 112))+rsp]


vmovdqa64 ZMMWORD[(368 + 64)+rsp],zmm0

vmovdqa xmm14,XMMWORD[((368 + 128))+rsp]
vmovdqa xmm15,XMMWORD[((368 + 144))+rsp]



vmovdqa YMMWORD[(368 + 128)+rsp],ymm0

vmovdqa64 ZMMWORD[128+rsp],zmm0
vmovdqa64 ZMMWORD[192+rsp],zmm0
vmovdqa64 ZMMWORD[256+rsp],zmm0



mov r10,0x3f
kmovq k2,r10
vmovdqa64 ZMMWORD[320+rsp]{k2},zmm0

mov rsp,rbp
pop rbp
vzeroupper
Expand Down

0 comments on commit 67d3e50

Please sign in to comment.