Skip to content

Commit

Permalink
Fix illegal instruction usage in Xeon Phi x200 processors
Browse files Browse the repository at this point in the history
The Xeon Phi x200 family of processors (Knights Landing) supports
AVX512 (F, CD, ER, PF) but does not support AVX512 (VL, DQ, BW).

Because of processors like this, the Intel Software Developer's Manual
suggests the bits AVX512 (DQ,BW,VL) are also tested in EBX together with
AVX512F before deciding to run AVX512 (DQ,BW,VL) instructions.

This also adds a new x86 feature called avx512_common that indicates
that AVX512 (F,DQ,BW,VL) are all available and start using this for both
adler32_avx512 and crc32_vpclmulqdq implementations because they are
both built with -mavx512dq -mavx512bw -mavx512vl.

This has been reported downstream as
https://bugzilla.redhat.com/show_bug.cgi?id=2280347 .
  • Loading branch information
tuliom authored and Dead2 committed May 19, 2024
1 parent 4104d81 commit 1a15c4b
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 10 deletions.
11 changes: 10 additions & 1 deletion arch/x86/x86_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,16 @@ void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) {

// check AVX512 bits if the OS supports saving ZMM registers
if (features->has_os_save_zmm) {
features->has_avx512 = ebx & 0x00010000;
features->has_avx512f = ebx & 0x00010000;
if (features->has_avx512f) {
// According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable
// AVX512(DQ,BW,VL).
features->has_avx512dq = ebx & 0x00020000;
features->has_avx512bw = ebx & 0x40000000;
features->has_avx512vl = ebx & 0x80000000;
}
features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \
&& features->has_avx512vl;
features->has_avx512vnni = ecx & 0x800;
}
}
Expand Down
6 changes: 5 additions & 1 deletion arch/x86/x86_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

struct x86_cpu_features {
int has_avx2;
int has_avx512;
int has_avx512f;
int has_avx512dq;
int has_avx512bw;
int has_avx512vl;
int has_avx512_common; // Enabled when AVX512(F,DQ,BW,VL) are all enabled.
int has_avx512vnni;
int has_sse2;
int has_ssse3;
Expand Down
4 changes: 2 additions & 2 deletions functable.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ static void init_functable(void) {
#endif
// X86 - AVX512 (F,DQ,BW,Vl)
#ifdef X86_AVX512
if (cf.x86.has_avx512) {
if (cf.x86.has_avx512_common) {
ft.adler32 = &adler32_avx512;
ft.adler32_fold_copy = &adler32_fold_copy_avx512;
}
Expand All @@ -138,7 +138,7 @@ static void init_functable(void) {
#endif
// X86 - VPCLMULQDQ
#ifdef X86_VPCLMULQDQ_CRC
if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) {
if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq) {
ft.crc32 = &crc32_vpclmulqdq;
ft.crc32_fold = &crc32_fold_vpclmulqdq;
ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy;
Expand Down
2 changes: 1 addition & 1 deletion test/benchmarks/benchmark_adler32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ BENCHMARK_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3);
BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2);
#endif
#ifdef X86_AVX512
BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512);
BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common);
#endif
#ifdef X86_AVX512VNNI
BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
Expand Down
4 changes: 2 additions & 2 deletions test/benchmarks/benchmark_adler32_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x
BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2);
#endif
#ifdef X86_AVX512
BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512);
BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512);
BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512_common);
BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512_common);
#endif
#ifdef X86_AVX512VNNI
BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
Expand Down
2 changes: 1 addition & 1 deletion test/benchmarks/benchmark_crc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
#endif
#ifdef X86_VPCLMULQDQ_CRC
/* CRC32 fold does a memory copy while hashing */
BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq));
BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq));
#endif

#endif
2 changes: 1 addition & 1 deletion test/test_adler32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ TEST_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3)
TEST_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2)
#endif
#ifdef X86_AVX512
TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512)
TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512_common)
#endif
#ifdef X86_AVX512VNNI
TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni)
Expand Down
2 changes: 1 addition & 1 deletion test/test_crc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ TEST_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx)
TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
#endif
#ifdef X86_VPCLMULQDQ_CRC
TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq))
TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
#endif

#endif

0 comments on commit 1a15c4b

Please sign in to comment.