Skip to content

Commit

Permalink
Extension of OPENSSL_ia32cap to accommodate additional CPUID bits
Browse files Browse the repository at this point in the history
bits 128 - 191 CPUID.(EAX=07H,ECX=0H).EDX and CPUID.(EAX=07H,ECX=1H).EAX
bits 192 - 255 CPUID.(EAX=07H,ECX=1H).EDX and CPUID.(EAX=07H,ECX=1H).EBX
bits 256 - 319 CPUID.(EAX=07H,ECX=1H).ECX and CPUID.(EAX=24H,ECX=0H).EBX

Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from openssl#25709)
  • Loading branch information
aelizaro authored and t8m committed Dec 13, 2024
1 parent 1b3b5a0 commit acc2655
Show file tree
Hide file tree
Showing 10 changed files with 264 additions and 87 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ OpenSSL 3.5

*Paul Dale*

* Extended `OPENSSL_ia32cap` support to accommodate additional `CPUID`
feature/capability bits in leaf `0x7` (Extended Feature Flags) as well
as leaf `0x24` (Converged Vector ISA).

*Dan Zimmerman, Alina Elizarova*

OpenSSL 3.4
-----------

Expand Down
55 changes: 35 additions & 20 deletions crypto/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
defined(__x86_64) || defined(__x86_64__) || \
defined(_M_AMD64) || defined(_M_X64)

extern unsigned int OPENSSL_ia32cap_P[4];
extern unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];

# if defined(OPENSSL_CPUID_OBJ)

Expand All @@ -29,18 +29,18 @@ extern unsigned int OPENSSL_ia32cap_P[4];
*/
# ifdef _WIN32
typedef WCHAR variant_char;

# define OPENSSL_IA32CAP_P_MAX_CHAR_SIZE 256
static variant_char *ossl_getenv(const char *name)
{
/*
* Since we pull only one environment variable, it's simpler to
* just ignore |name| and use equivalent wide-char L-literal.
* As well as to ignore excessively long values...
*/
static WCHAR value[48];
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, 48);
static WCHAR value[OPENSSL_IA32CAP_P_MAX_CHAR_SIZE];
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, OPENSSL_IA32CAP_P_MAX_CHAR_SIZE);

return (len > 0 && len < 48) ? value : NULL;
return (len > 0 && len < OPENSSL_IA32CAP_P_MAX_CHAR_SIZE) ? value : NULL;
}
# else
typedef char variant_char;
Expand Down Expand Up @@ -98,6 +98,7 @@ void OPENSSL_cpuid_setup(void)
IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
IA32CAP vec;
const variant_char *env;
int index = 2;

if (trigger)
return;
Expand Down Expand Up @@ -126,23 +127,37 @@ void OPENSSL_cpuid_setup(void)
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
}

if ((env = ossl_strchr(env, ':')) != NULL) {
IA32CAP vecx;

/* Processed indexes 0, 1 */
if ((env = ossl_strchr(env, ':')) != NULL)
env++;
off = (env[0] == '~') ? 1 : 0;
vecx = ossl_strtouint64(env + off);
if (off) {
OPENSSL_ia32cap_P[2] &= ~(unsigned int)vecx;
OPENSSL_ia32cap_P[3] &= ~(unsigned int)(vecx >> 32);
} else {
OPENSSL_ia32cap_P[2] = (unsigned int)vecx;
OPENSSL_ia32cap_P[3] = (unsigned int)(vecx >> 32);
for (; index < OPENSSL_IA32CAP_P_MAX_INDEXES; index += 2) {
if ((env != NULL) && (env[0] != '\0')) {
/* if env[0] == ':' current index is skipped */
if (env[0] != ':') {
IA32CAP vecx;

off = (env[0] == '~') ? 1 : 0;
vecx = ossl_strtouint64(env + off);
if (off) {
OPENSSL_ia32cap_P[index] &= ~(unsigned int)vecx;
OPENSSL_ia32cap_P[index + 1] &= ~(unsigned int)(vecx >> 32);
} else {
OPENSSL_ia32cap_P[index] = (unsigned int)vecx;
OPENSSL_ia32cap_P[index + 1] = (unsigned int)(vecx >> 32);
}
}
/* skip delimeter */
if ((env = ossl_strchr(env, ':')) != NULL)
env++;
} else { /* zeroize the next two indexes */
OPENSSL_ia32cap_P[index] = 0;
OPENSSL_ia32cap_P[index + 1] = 0;
}
} else {
OPENSSL_ia32cap_P[2] = 0;
OPENSSL_ia32cap_P[3] = 0;
}

/* If AVX10 is disabled, zero out its detailed cap bits */
if (!(OPENSSL_ia32cap_P[6] & (1 << 19)))
OPENSSL_ia32cap_P[9] = 0;
} else {
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
}
Expand All @@ -156,7 +171,7 @@ void OPENSSL_cpuid_setup(void)
OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
}
# else
unsigned int OPENSSL_ia32cap_P[4];
unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
# endif
#endif

Expand Down
13 changes: 10 additions & 3 deletions crypto/info.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# include "crypto/riscv_arch.h"
# define CPU_INFO_STR_LEN 2048
#else
# define CPU_INFO_STR_LEN 128
# define CPU_INFO_STR_LEN 256
#endif

/* extern declaration to avoid warning */
Expand All @@ -52,11 +52,18 @@ DEFINE_RUN_ONCE_STATIC(init_info_strings)
const char *env;

BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str),
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%llx:0x%llx",
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx",
(unsigned long long)OPENSSL_ia32cap_P[0] |
(unsigned long long)OPENSSL_ia32cap_P[1] << 32,
(unsigned long long)OPENSSL_ia32cap_P[2] |
(unsigned long long)OPENSSL_ia32cap_P[3] << 32);
(unsigned long long)OPENSSL_ia32cap_P[3] << 32,
(unsigned long long)OPENSSL_ia32cap_P[4] |
(unsigned long long)OPENSSL_ia32cap_P[5] << 32,
(unsigned long long)OPENSSL_ia32cap_P[6] |
(unsigned long long)OPENSSL_ia32cap_P[7] << 32,
(unsigned long long)OPENSSL_ia32cap_P[8] |
(unsigned long long)OPENSSL_ia32cap_P[9] << 32);

if ((env = getenv("OPENSSL_ia32cap")) != NULL)
BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str),
sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str),
Expand Down
3 changes: 2 additions & 1 deletion crypto/perlasm/x86gas.pl
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ sub ::file_end
}
}
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,40";
if ($::macosx) { push (@out,"$tmp,2\n"); }
elsif ($::elf) { push (@out,"$tmp,4\n"); }
else { push (@out,"$tmp\n"); }
Expand Down
3 changes: 2 additions & 1 deletion crypto/perlasm/x86masm.pl
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,10 @@ sub ::file_end
push(@out,"$segment ENDS\n");

if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
{ my $comm=<<___;
.bss SEGMENT 'BSS'
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:10
.bss ENDS
___
# comment out OPENSSL_ia32cap_P declarations
Expand Down
3 changes: 2 additions & 1 deletion crypto/perlasm/x86nasm.pl
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,10 @@ sub ::function_end_B

sub ::file_end
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
{ my $comm=<<___;
${drdecor}segment .bss
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 40
___
# comment out OPENSSL_ia32cap_P declarations
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
Expand Down
36 changes: 31 additions & 5 deletions crypto/x86_64cpuid.pl
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
("%rdi","%rsi","%rdx","%rcx"); # Unix order

print<<___;
#include crypto/cryptlib.h
.extern OPENSSL_cpuid_setup
.hidden OPENSSL_cpuid_setup
.section .init
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
.comm OPENSSL_ia32cap_P,16,4
.comm OPENSSL_ia32cap_P,40,4 # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
.text
.globl OPENSSL_atomic_add
Expand Down Expand Up @@ -192,6 +192,7 @@
mov \$7,%eax
xor %ecx,%ecx
cpuid
movd %eax,%xmm1 # put aside leaf 07H Max Sub-leaves
bt \$26,%r9d # check XSAVE bit, cleared on Knights
jc .Lnotknights
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
Expand All @@ -202,9 +203,31 @@
jne .Lnotskylakex
and \$0xfffeffff,%ebx # ~(1<<16)
# suppress AVX512F flag on Skylake-X
.Lnotskylakex:
mov %ebx,8(%rdi) # save extended feature flags
mov %ecx,12(%rdi)
.Lnotskylakex: # save extended feature flags
mov %ebx,8(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
mov %ecx,12(%rdi) # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
mov %edx,16(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
movd %xmm1,%eax # Restore leaf 07H Max Sub-leaves
cmp \$0x1,%eax # Do we have cpuid(EAX=0x7, ECX=0x1)?
jb .Lno_extended_info
mov \$0x7,%eax
mov \$0x1,%ecx
cpuid # cpuid(EAX=0x7, ECX=0x1)
mov %eax,20(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
mov %edx,24(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
mov %ebx,28(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
mov %ecx,32(%rdi) # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
and \$0x80000,%edx # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
cmp \$0x0,%edx
je .Lno_extended_info
mov \$0x24,%eax # Have AVX10 Support, query for details
mov \$0x0,%ecx
cpuid # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
mov %ebx,36(%rdi) # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
.Lno_extended_info:
bt \$27,%r9d # check OSXSAVE bit
Expand All @@ -223,6 +246,9 @@
cmp \$6,%eax
je .Ldone
.Lclear_avx:
andl \$0xff7fffff,20(%rdi) # ~(1<<23)
# clear AVXIFMA, which is VEX-encoded
# and requires YMM state support
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
Expand Down
26 changes: 25 additions & 1 deletion crypto/x86cpuid.pl
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,28 @@
&mov ("eax",7);
&xor ("ecx","ecx");
&cpuid ();
&mov (&DWP(8,"edi"),"ebx"); # save extended feature flag
&mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
&mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
&mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
&cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)?
&jb (&label("no_extended_info"));
&mov ("eax",7);
&mov ("ecx",1);
&cpuid (); # cpuid(EAX=0x7, ECX=0x1)
&mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
&mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
&mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
&mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]

&and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
&cmp ("edx",0x0);
&je (&label("no_extended_info"));

&mov ("eax",0x24); # Have AVX10 Support, query for details
&mov ("ecx",0x0);
&cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
&mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]

&set_label("no_extended_info");

&bt ("ebp",27); # check OSXSAVE bit
Expand All @@ -154,6 +175,9 @@
&and ("esi",0xfeffffff); # clear FXSR
&set_label("clear_avx");
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
&and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA,
# which is VEX-encoded
# and requires YMM state support
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
&set_label("done");
&mov ("eax","esi");
Expand Down
Loading

0 comments on commit acc2655

Please sign in to comment.