From 798a818f50a9bfc01e8b5943090de458863b897b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 30 Sep 2022 10:14:27 -0500 Subject: [PATCH 01/16] qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1 A guest only ever experiences, at most, 1 bit of reduced physical addressing. Change the query-sev-capabilities json comment to use 1. Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command") Signed-off-by: Tom Lendacky Reviewed-by: Dr. David Alan Gilbert Message-Id: Signed-off-by: Paolo Bonzini --- qapi/misc-target.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi/misc-target.json b/qapi/misc-target.json index de91054523..bf04042f45 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -172,7 +172,7 @@ # -> { "execute": "query-sev-capabilities" } # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE", # "cpu0-id": "2lvmGwo+...61iEinw==", -# "cbitpos": 47, "reduced-phys-bits": 5}} +# "cbitpos": 47, "reduced-phys-bits": 1}} # ## { 'command': 'query-sev-capabilities', 'returns': 'SevCapability', From 326e3015c4c6f3197157ea0bb00826ae740e2fad Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 30 Sep 2022 10:14:28 -0500 Subject: [PATCH 02/16] qemu-options.hx: Update the reduced-phys-bits documentation A guest only ever experiences, at most, 1 bit of reduced physical addressing. Update the documentation to reflect this as well as change the example value on the reduced-phys-bits option. Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object") Signed-off-by: Tom Lendacky Reviewed-by: Dr. David Alan Gilbert Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- qemu-options.hx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qemu-options.hx b/qemu-options.hx index b5efa648ba..42fc90aae4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5438,7 +5438,7 @@ SRST physical address space. The ``reduced-phys-bits`` is used to provide the number of bits we loose in physical address space. Similar to C-bit, the value is Host family dependent. On EPYC, - the value should be 5. + a guest will lose a maximum of 1 bit, so the value should be 1. The ``sev-device`` provides the device file to use for communicating with the SEV firmware running inside AMD Secure @@ -5473,7 +5473,7 @@ SRST # |qemu_system_x86| \\ ...... \\ - -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\ + -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\ -machine ...,memory-encryption=sev0 \\ ..... From 8168fed9f84e3128f7628969ae78af49433d5ce7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 30 Sep 2022 10:14:29 -0500 Subject: [PATCH 03/16] i386/sev: Update checks and information related to reduced-phys-bits The value of the reduced-phys-bits parameter is propogated to the CPUID information exposed to the guest. Update the current validation check to account for the size of the CPUID field (6-bits), ensuring the value is in the range of 1 to 63. Maintain backward compatibility, to an extent, by allowing a value greater than 1 (so that the previously documented value of 5 still works), but not allowing anything over 63. Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context") Signed-off-by: Tom Lendacky Reviewed-by: Dr. David Alan Gilbert Message-Id: Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 859e06f6ad..fe2144c038 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); host_cbitpos = ebx & 0x3f; + /* + * The cbitpos value will be placed in bit positions 5:0 of the EBX + * register of CPUID 0x8000001F. No need to verify the range as the + * comparison against the host value accomplishes that. + */ if (host_cbitpos != sev->cbitpos) { error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", __func__, host_cbitpos, sev->cbitpos); goto err; } - if (sev->reduced_phys_bits < 1) { - error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1," - " requested '%d'", __func__, sev->reduced_phys_bits); + /* + * The reduced-phys-bits value will be placed in bit positions 11:6 of + * the EBX register of CPUID 0x8000001F, so verify the supplied value + * is in the range of 1 to 63. + */ + if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", + __func__, sev->reduced_phys_bits); goto err; } From fb6bbafc0f19385fb257ee073ed13dcaf613f2f8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 30 Sep 2022 10:14:30 -0500 Subject: [PATCH 04/16] i386/cpu: Update how the EBX register of CPUID 0x8000001F is set Update the setting of CPUID 0x8000001F EBX to clearly document the ranges associated with fields being set. Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active") Signed-off-by: Tom Lendacky Reviewed-by: Dr. David Alan Gilbert Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2e30e348a1..73dd99374a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6000,8 +6000,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (sev_enabled()) { *eax = 0x2; *eax |= sev_es_enabled() ? 0x8 : 0; - *ebx = sev_get_cbit_position(); - *ebx |= sev_get_reduced_phys_bits() << 6; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } break; default: From a9ce107fd0f2017af84255a9cf6542fa3eb3e214 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:08 +0800 Subject: [PATCH 05/16] target/i386: Add support for CMPCCXADD in CPUID enumeration CMPccXADD is a new set of instructions in the latest Intel platform Sierra Forest. This new instruction set includes a semaphore operation that can compare and add the operands if condition is met, which can improve database performance. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 7] Add CPUID definition for CMPCCXADD. Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-2-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 73dd99374a..67210ffd79 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -875,7 +875,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, NULL, NULL, - "avx-vnni", "avx512-bf16", NULL, NULL, + "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", NULL, NULL, "fzrm", "fsrs", "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index d243e290d3..d5843c1555 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -907,6 +907,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* CMPCCXADD Instructions */ +#define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ #define CPUID_7_1_EAX_FZRM (1U << 10) /* Fast Short REP STOS */ From 99ed8445ea27742a4df40f51a3a5fbd6f8e76fa5 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:09 +0800 Subject: [PATCH 06/16] target/i386: Add support for AMX-FP16 in CPUID enumeration Latest Intel platform Granite Rapids has introduced a new instruction - AMX-FP16, which performs dot-products of two FP16 tiles and accumulates the results into a packed single precision tile. AMX-FP16 adds FP16 capability and allows a FP16 GPU trained model to run faster without loss of accuracy or added SW overhead. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 21] Add CPUID definition for AMX-FP16. Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-3-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 67210ffd79..841c407d6d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -879,7 +879,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, "fzrm", "fsrs", "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "amx-fp16", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index d5843c1555..7deb37eca5 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -915,6 +915,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EAX_FSRS (1U << 11) /* Fast Short REP CMPS/SCAS */ #define CPUID_7_1_EAX_FSRC (1U << 12) +/* Support Tile Computational Operations on FP16 Numbers */ +#define CPUID_7_1_EAX_AMX_FP16 (1U << 21) /* XFD Extend Feature Disabled */ #define CPUID_D_1_EAX_XFD (1U << 4) From a957a88416ecbec51e147cba9fe89b93f6646b3b Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:10 +0800 Subject: [PATCH 07/16] target/i386: Add support for AVX-IFMA in CPUID enumeration AVX-IFMA is a new instruction in the latest Intel platform Sierra Forest. This instruction packed multiplies unsigned 52-bit integers and adds the low/high 52-bit products to Qword Accumulators. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 23] Add CPUID definition for AVX-IFMA. Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-4-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 841c407d6d..8eb2ee5045 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -879,7 +879,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, "fzrm", "fsrs", "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, "amx-fp16", NULL, NULL, + NULL, "amx-fp16", NULL, "avx-ifma", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 7deb37eca5..1f72d11e0c 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -917,6 +917,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EAX_FSRC (1U << 12) /* Support Tile Computational Operations on FP16 Numbers */ #define CPUID_7_1_EAX_AMX_FP16 (1U << 21) +/* Support for VPMADD52[H,L]UQ */ +#define CPUID_7_1_EAX_AVX_IFMA (1U << 23) /* XFD Extend Feature Disabled */ #define CPUID_D_1_EAX_XFD (1U << 4) From eaaa197d5b112ea2758b54df58881a2626de3af5 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:11 +0800 Subject: [PATCH 08/16] target/i386: Add support for AVX-VNNI-INT8 in CPUID enumeration AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform Sierra Forest, aims for the platform to have superior AI capabilities. This instruction multiplies the individual bytes of two unsigned or unsigned source operands, then adds and accumulates the results into the destination dword element size operand. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 4] AVX-VNNI-INT8 is on a new feature bits leaf. Add a CPUID feature word FEAT_7_1_EDX for this leaf. Add CPUID definition for AVX-VNNI-INT8. Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-5-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 22 +++++++++++++++++++++- target/i386/cpu.h | 4 ++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 8eb2ee5045..abceab2b69 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -667,6 +667,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_0_EDX_FEATURES CPUID_7_0_EDX_FSRM #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC) +#define TCG_7_1_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) @@ -890,6 +891,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_1_EAX_FEATURES, }, + [FEAT_7_1_EDX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + "avx-vnni-int8", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_EDX, + }, + .tcg_features = TCG_7_1_EDX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -5534,9 +5554,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } else if (count == 1) { *eax = env->features[FEAT_7_1_EAX]; + *edx = env->features[FEAT_7_1_EDX]; *ebx = 0; *ecx = 0; - *edx = 0; } else { *eax = 0; *ebx = 0; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1f72d11e0c..0b25d18075 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -626,6 +626,7 @@ typedef enum FeatureWord { FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEATURE_WORDS, } FeatureWord; @@ -920,6 +921,9 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, /* Support for VPMADD52[H,L]UQ */ #define CPUID_7_1_EAX_AVX_IFMA (1U << 23) +/* Support for VPDPB[SU,UU,SS]D[,S] */ +#define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) + /* XFD Extend Feature Disabled */ #define CPUID_D_1_EAX_XFD (1U << 4) From ecd2e6ca037d7bf3673c5478590d686d5cd6135a Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:12 +0800 Subject: [PATCH 09/16] target/i386: Add support for AVX-NE-CONVERT in CPUID enumeration AVX-NE-CONVERT is a new set of instructions which can convert low precision floating point like BF16/FP16 to high precision floating point FP32, as well as convert FP32 elements to BF16. This instruction allows the platform to have improved AI capabilities and better compatibility. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 5] Add CPUID definition for AVX-NE-CONVERT. Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-6-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index abceab2b69..0204a3ac80 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -895,7 +895,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, NULL, NULL, - "avx-vnni-int8", NULL, NULL, NULL, + "avx-vnni-int8", "avx-ne-convert", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 0b25d18075..b46d52f3fa 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -923,6 +923,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) +/* AVX NE CONVERT Instructions */ +#define CPUID_7_1_EDX_AVX_NE_CONVERT (1U << 5) /* XFD Extend Feature Disabled */ #define CPUID_D_1_EAX_XFD (1U << 4) From d1a1111514333e46a98b136235f71eef90d610fa Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:13 +0800 Subject: [PATCH 10/16] target/i386: Add support for PREFETCHIT0/1 in CPUID enumeration Latest Intel platform Granite Rapids has introduced a new instruction - PREFETCHIT0/1, which moves code to memory (cache) closer to the processor depending on specific hints. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 14] Add CPUID definition for PREFETCHIT0/1. Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-7-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0204a3ac80..823320fe42 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -897,7 +897,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, "avx-vnni-int8", "avx-ne-convert", NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, "prefetchiti", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b46d52f3fa..8504aaac68 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -925,6 +925,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) /* AVX NE CONVERT Instructions */ #define CPUID_7_1_EDX_AVX_NE_CONVERT (1U << 5) +/* PREFETCHIT0/1 Instructions */ +#define CPUID_7_1_EDX_PREFETCHITI (1U << 14) /* XFD Extend Feature Disabled */ #define CPUID_D_1_EAX_XFD (1U << 4) From 193ba660af14fc20cddaa04c20bb79ed05d1dbf0 Mon Sep 17 00:00:00 2001 From: David 'Digit' Turner Date: Wed, 5 Apr 2023 14:59:19 +0200 Subject: [PATCH 11/16] Fix libvhost-user.c compilation. The source file uses VIRTIO_F_VERSION_1 which is not defined by on Debian 10. The system-provided which does not include the macro definition is included through , so fix the issue by including the standard-headers version before that. Signed-off-by: David 'Digit' Turner Message-Id: <20230405125920.2951721-2-digit@google.com> Signed-off-by: Paolo Bonzini --- subprojects/libvhost-user/libvhost-user.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 0abd898a52..8fb61e2df2 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -32,6 +32,12 @@ #include #include +/* Necessary to provide VIRTIO_F_VERSION_1 on system + * with older linux headers. Must appear before + * below. + */ +#include "standard-headers/linux/virtio_config.h" + #if defined(__linux__) #include #include From 9fc7dd234f527f411b08e6a266a5ab8e7b79f64f Mon Sep 17 00:00:00 2001 From: David 'Digit' Turner Date: Wed, 5 Apr 2023 19:21:08 +0200 Subject: [PATCH 12/16] update-linux-headers.sh: Add missing kernel headers. Add , used by hw/display/virtio-gpu-udmabuf.c Add , used by qga/commands-posix.c Add used by kvm-all.c, which requires the _BITUL() macro definition to be available. Without these, QEMU will not compile on Debian 10 systems. Signed-off-by: David 'Digit' Turner Message-Id: <20230405172109.3081788-3-digit@google.com> [Add for __DECLARE_FLEX_ARRAY. - Paolo] Signed-off-by: Paolo Bonzini --- scripts/update-linux-headers.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index b1ad99cba8..35a64bb501 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -160,8 +160,8 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" -for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ - psci.h psp-sev.h userfaultfd.h mman.h vduse.h; do +for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h vduse.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done From c5c0fdbe39aa2f65fecd93ab3082b1b8a7e2a318 Mon Sep 17 00:00:00 2001 From: David 'Digit' Turner Date: Wed, 5 Apr 2023 19:21:09 +0200 Subject: [PATCH 13/16] Update linux headers to v6.3rc5 commit 7e364e56293bb98cae1b55fd835f5991c4e96e7d Signed-off-by: David 'Digit' Turner Message-Id: <20230405172109.3081788-4-digit@google.com> Signed-off-by: Paolo Bonzini --- include/standard-headers/drm/drm_fourcc.h | 12 ++ include/standard-headers/linux/ethtool.h | 48 +++++++- include/standard-headers/linux/fuse.h | 45 +++++++- include/standard-headers/linux/pci_regs.h | 1 + include/standard-headers/linux/vhost_types.h | 2 + include/standard-headers/linux/virtio_blk.h | 105 +++++++++++++++++ linux-headers/asm-arm64/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 34 +++++- linux-headers/linux/const.h | 36 ++++++ linux-headers/linux/kvm.h | 9 ++ linux-headers/linux/memfd.h | 39 +++++++ linux-headers/linux/nvme_ioctl.h | 114 +++++++++++++++++++ linux-headers/linux/stddef.h | 47 ++++++++ linux-headers/linux/vfio.h | 15 ++- linux-headers/linux/vhost.h | 8 ++ 15 files changed, 506 insertions(+), 10 deletions(-) create mode 100644 linux-headers/linux/const.h create mode 100644 linux-headers/linux/memfd.h create mode 100644 linux-headers/linux/nvme_ioctl.h create mode 100644 linux-headers/linux/stddef.h diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 69cab17b38..dc3e6112c1 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -87,6 +87,18 @@ extern "C" { * * The authoritative list of format modifier codes is found in * `include/uapi/drm/drm_fourcc.h` + * + * Open Source User Waiver + * ----------------------- + * + * Because this is the authoritative source for pixel formats and modifiers + * referenced by GL, Vulkan extensions and other standards and hence used both + * by open source and closed source driver stacks, the usual requirement for an + * upstream in-kernel or open source userspace user does not apply. + * + * To ensure, as much as feasible, compatibility across stacks and avoid + * confusion with incompatible enumerations stakeholders for all relevant driver + * stacks should approve additions. */ #define fourcc_code(a, b, c, d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 87176ab075..99fcddf04f 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -711,6 +711,24 @@ enum ethtool_stringset { ETH_SS_COUNT }; +/** + * enum ethtool_mac_stats_src - source of ethtool MAC statistics + * @ETHTOOL_MAC_STATS_SRC_AGGREGATE: + * if device supports a MAC merge layer, this retrieves the aggregate + * statistics of the eMAC and pMAC. Otherwise, it retrieves just the + * statistics of the single (express) MAC. + * @ETHTOOL_MAC_STATS_SRC_EMAC: + * if device supports a MM layer, this retrieves the eMAC statistics. + * Otherwise, it retrieves the statistics of the single (express) MAC. + * @ETHTOOL_MAC_STATS_SRC_PMAC: + * if device supports a MM layer, this retrieves the pMAC statistics. + */ +enum ethtool_mac_stats_src { + ETHTOOL_MAC_STATS_SRC_AGGREGATE, + ETHTOOL_MAC_STATS_SRC_EMAC, + ETHTOOL_MAC_STATS_SRC_PMAC, +}; + /** * enum ethtool_module_power_mode_policy - plug-in module power mode policy * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode. @@ -779,6 +797,31 @@ enum ethtool_podl_pse_pw_d_status { ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, }; +/** + * enum ethtool_mm_verify_status - status of MAC Merge Verify function + * @ETHTOOL_MM_VERIFY_STATUS_UNKNOWN: + * verification status is unknown + * @ETHTOOL_MM_VERIFY_STATUS_INITIAL: + * the 802.3 Verify State diagram is in the state INIT_VERIFICATION + * @ETHTOOL_MM_VERIFY_STATUS_VERIFYING: + * the Verify State diagram is in the state VERIFICATION_IDLE, + * SEND_VERIFY or WAIT_FOR_RESPONSE + * @ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED: + * indicates that the Verify State diagram is in the state VERIFIED + * @ETHTOOL_MM_VERIFY_STATUS_FAILED: + * the Verify State diagram is in the state VERIFY_FAIL + * @ETHTOOL_MM_VERIFY_STATUS_DISABLED: + * verification of preemption operation is disabled + */ +enum ethtool_mm_verify_status { + ETHTOOL_MM_VERIFY_STATUS_UNKNOWN, + ETHTOOL_MM_VERIFY_STATUS_INITIAL, + ETHTOOL_MM_VERIFY_STATUS_VERIFYING, + ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED, + ETHTOOL_MM_VERIFY_STATUS_FAILED, + ETHTOOL_MM_VERIFY_STATUS_DISABLED, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS @@ -1183,7 +1226,7 @@ struct ethtool_rxnfc { uint32_t rule_cnt; uint32_t rss_context; }; - uint32_t rule_locs[0]; + uint32_t rule_locs[]; }; @@ -1741,6 +1784,9 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT = 96, ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT = 97, ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT = 98, + ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99, + ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, + ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index a1af78d989..35c131a107 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -201,6 +201,11 @@ * 7.38 * - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry * - add FOPEN_PARALLEL_DIRECT_WRITES + * - add total_extlen to fuse_in_header + * - add FUSE_MAX_NR_SECCTX + * - add extension header + * - add FUSE_EXT_GROUPS + * - add FUSE_CREATE_SUPP_GROUP */ #ifndef _LINUX_FUSE_H @@ -358,6 +363,8 @@ struct fuse_file_lock { * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and * mknod * FUSE_HAS_INODE_DAX: use per inode DAX + * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, + * symlink and mknod (single group that matches parent) */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -394,6 +401,7 @@ struct fuse_file_lock { /* bits 32..63 get shifted down 32 bits into the flags2 field */ #define FUSE_SECURITY_CTX (1ULL << 32) #define FUSE_HAS_INODE_DAX (1ULL << 33) +#define FUSE_CREATE_SUPP_GROUP (1ULL << 34) /** * CUSE INIT request/reply flags @@ -499,6 +507,17 @@ struct fuse_file_lock { */ #define FUSE_EXPIRE_ONLY (1 << 0) +/** + * extension type + * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx + * FUSE_EXT_GROUPS: &fuse_supp_groups extension + */ +enum fuse_ext_type { + /* Types 0..31 are reserved for fuse_secctx_header */ + FUSE_MAX_NR_SECCTX = 31, + FUSE_EXT_GROUPS = 32, +}; + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -882,7 +901,8 @@ struct fuse_in_header { uint32_t uid; uint32_t gid; uint32_t pid; - uint32_t padding; + uint16_t total_extlen; /* length of extensions in 8byte units */ + uint16_t padding; }; struct fuse_out_header { @@ -1043,4 +1063,27 @@ struct fuse_secctx_header { uint32_t nr_secctx; }; +/** + * struct fuse_ext_header - extension header + * @size: total size of this extension including this header + * @type: type of extension + * + * This is made compatible with fuse_secctx_header by using type values > + * FUSE_MAX_NR_SECCTX + */ +struct fuse_ext_header { + uint32_t size; + uint32_t type; +}; + +/** + * struct fuse_supp_groups - Supplementary group extension + * @nr_groups: number of supplementary groups + * @groups: flexible array of group IDs + */ +struct fuse_supp_groups { + uint32_t nr_groups; + uint32_t groups[]; +}; + #endif /* _LINUX_FUSE_H */ diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 85ab127881..dc2000e0fe 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -693,6 +693,7 @@ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h index c41a73fe36..88600e2d9f 100644 --- a/include/standard-headers/linux/vhost_types.h +++ b/include/standard-headers/linux/vhost_types.h @@ -163,5 +163,7 @@ struct vhost_vdpa_iova_range { #define VHOST_BACKEND_F_IOTLB_ASID 0x3 /* Device can be suspended */ #define VHOST_BACKEND_F_SUSPEND 0x4 +/* Device can be resumed */ +#define VHOST_BACKEND_F_RESUME 0x5 #endif diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h index e81715cd70..7155b1a470 100644 --- a/include/standard-headers/linux/virtio_blk.h +++ b/include/standard-headers/linux/virtio_blk.h @@ -41,6 +41,7 @@ #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ #define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ +#define VIRTIO_BLK_F_ZONED 17 /* Zoned block device */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -135,6 +136,16 @@ struct virtio_blk_config { /* Secure erase commands must be aligned to this number of sectors. */ __virtio32 secure_erase_sector_alignment; + /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ + struct virtio_blk_zoned_characteristics { + uint32_t zone_sectors; + uint32_t max_open_zones; + uint32_t max_active_zones; + uint32_t max_append_sectors; + uint32_t write_granularity; + uint8_t model; + uint8_t unused2[3]; + } zoned; } QEMU_PACKED; /* @@ -172,6 +183,27 @@ struct virtio_blk_config { /* Secure erase command */ #define VIRTIO_BLK_T_SECURE_ERASE 14 +/* Zone append command */ +#define VIRTIO_BLK_T_ZONE_APPEND 15 + +/* Report zones command */ +#define VIRTIO_BLK_T_ZONE_REPORT 16 + +/* Open zone command */ +#define VIRTIO_BLK_T_ZONE_OPEN 18 + +/* Close zone command */ +#define VIRTIO_BLK_T_ZONE_CLOSE 20 + +/* Finish zone command */ +#define VIRTIO_BLK_T_ZONE_FINISH 22 + +/* Reset zone command */ +#define VIRTIO_BLK_T_ZONE_RESET 24 + +/* Reset All zones command */ +#define VIRTIO_BLK_T_ZONE_RESET_ALL 26 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 @@ -191,6 +223,72 @@ struct virtio_blk_outhdr { __virtio64 sector; }; +/* + * Supported zoned device models. + */ + +/* Regular block device */ +#define VIRTIO_BLK_Z_NONE 0 +/* Host-managed zoned device */ +#define VIRTIO_BLK_Z_HM 1 +/* Host-aware zoned device */ +#define VIRTIO_BLK_Z_HA 2 + +/* + * Zone descriptor. A part of VIRTIO_BLK_T_ZONE_REPORT command reply. + */ +struct virtio_blk_zone_descriptor { + /* Zone capacity */ + uint64_t z_cap; + /* The starting sector of the zone */ + uint64_t z_start; + /* Zone write pointer position in sectors */ + uint64_t z_wp; + /* Zone type */ + uint8_t z_type; + /* Zone state */ + uint8_t z_state; + uint8_t reserved[38]; +}; + +struct virtio_blk_zone_report { + uint64_t nr_zones; + uint8_t reserved[56]; + struct virtio_blk_zone_descriptor zones[]; +}; + +/* + * Supported zone types. + */ + +/* Conventional zone */ +#define VIRTIO_BLK_ZT_CONV 1 +/* Sequential Write Required zone */ +#define VIRTIO_BLK_ZT_SWR 2 +/* Sequential Write Preferred zone */ +#define VIRTIO_BLK_ZT_SWP 3 + +/* + * Zone states that are available for zones of all types. + */ + +/* Not a write pointer (conventional zones only) */ +#define VIRTIO_BLK_ZS_NOT_WP 0 +/* Empty */ +#define VIRTIO_BLK_ZS_EMPTY 1 +/* Implicitly Open */ +#define VIRTIO_BLK_ZS_IOPEN 2 +/* Explicitly Open */ +#define VIRTIO_BLK_ZS_EOPEN 3 +/* Closed */ +#define VIRTIO_BLK_ZS_CLOSED 4 +/* Read-Only */ +#define VIRTIO_BLK_ZS_RDONLY 13 +/* Full */ +#define VIRTIO_BLK_ZS_FULL 14 +/* Offline */ +#define VIRTIO_BLK_ZS_OFFLINE 15 + /* Unmap this range (only valid for write zeroes command) */ #define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 @@ -217,4 +315,11 @@ struct virtio_scsi_inhdr { #define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_IOERR 1 #define VIRTIO_BLK_S_UNSUPP 2 + +/* Error codes that are specific to zoned block devices */ +#define VIRTIO_BLK_S_ZONE_INVALID_CMD 3 +#define VIRTIO_BLK_S_ZONE_UNALIGNED_WP 4 +#define VIRTIO_BLK_S_ZONE_OPEN_RESOURCE 5 +#define VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE 6 + #endif /* _LINUX_VIRTIO_BLK_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index a7cfefb3a8..d7e7bb885e 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -109,6 +109,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ +#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ struct kvm_vcpu_init { __u32 target; diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 2747d2ce14..2937e7bf69 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -9,6 +9,7 @@ #include #include +#include #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 @@ -505,8 +506,8 @@ struct kvm_nested_state { * KVM_{GET,PUT}_NESTED_STATE ioctl values. */ union { - struct kvm_vmx_nested_state_data vmx[0]; - struct kvm_svm_nested_state_data svm[0]; + __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx); + __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm); } data; }; @@ -523,6 +524,35 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + +/* + * Masked event layout. + * Bits Description + * ---- ----------- + * 7:0 event select (low bits) + * 15:8 umask match + * 31:16 unused + * 35:32 event select (high bits) + * 36:54 unused + * 55 exclude bit + * 63:56 umask mask + */ + +#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \ + (((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \ + (((mask) & 0xFFULL) << 56) | \ + (((match) & 0xFFULL) << 8) | \ + ((__u64)(!!(exclude)) << 55)) + +#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ + (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ diff --git a/linux-headers/linux/const.h b/linux-headers/linux/const.h new file mode 100644 index 0000000000..5e48987251 --- /dev/null +++ b/linux-headers/linux/const.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* const.h: Macros for dealing with constants. */ + +#ifndef _LINUX_CONST_H +#define _LINUX_CONST_H + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. + */ + +#ifdef __ASSEMBLY__ +#define _AC(X,Y) X +#define _AT(T,X) X +#else +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) +#endif + +#define _UL(x) (_AC(x, UL)) +#define _ULL(x) (_AC(x, ULL)) + +#define _BITUL(x) (_UL(1) << (x)) +#define _BITULL(x) (_ULL(1) << (x)) + +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + +#endif /* _LINUX_CONST_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 1e2c16cfe3..599de3c6e3 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -581,6 +581,8 @@ struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -593,11 +595,17 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_SIDA_WRITE 3 #define KVM_S390_MEMOP_ABSOLUTE_READ 4 #define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -1173,6 +1181,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 +#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/linux-headers/linux/memfd.h b/linux-headers/linux/memfd.h new file mode 100644 index 0000000000..01c0324e77 --- /dev/null +++ b/linux-headers/linux/memfd.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_MEMFD_H +#define _LINUX_MEMFD_H + +#include + +/* flags for memfd_create(2) (unsigned int) */ +#define MFD_CLOEXEC 0x0001U +#define MFD_ALLOW_SEALING 0x0002U +#define MFD_HUGETLB 0x0004U +/* not executable and sealed to prevent changing to executable. */ +#define MFD_NOEXEC_SEAL 0x0008U +/* executable */ +#define MFD_EXEC 0x0010U + +/* + * Huge page size encoding when MFD_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB +#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB +#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + +#endif /* _LINUX_MEMFD_H */ diff --git a/linux-headers/linux/nvme_ioctl.h b/linux-headers/linux/nvme_ioctl.h new file mode 100644 index 0000000000..f8df31dbc4 --- /dev/null +++ b/linux-headers/linux/nvme_ioctl.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Definitions for the NVM Express ioctl interface + * Copyright (c) 2011-2014, Intel Corporation. + */ + +#ifndef _LINUX_NVME_IOCTL_H +#define _LINUX_NVME_IOCTL_H + +#include + +struct nvme_user_io { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +}; + +struct nvme_passthru_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 result; +}; + +struct nvme_passthru_cmd64 { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + union { + __u32 data_len; /* for non-vectored io */ + __u32 vec_cnt; /* for vectored io */ + }; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 rsvd2; + __u64 result; +}; + +/* same as struct nvme_passthru_cmd64, minus the 8b result field */ +struct nvme_uring_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 rsvd2; +}; + +#define nvme_admin_cmd nvme_passthru_cmd + +#define NVME_IOCTL_ID _IO('N', 0x40) +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) +#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) +#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) +#define NVME_IOCTL_RESCAN _IO('N', 0x46) +#define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) + +/* io_uring async commands: */ +#define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) +#define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd) +#define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd) +#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd) + +#endif /* _LINUX_NVME_IOCTL_H */ diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h new file mode 100644 index 0000000000..bb6ea517ef --- /dev/null +++ b/linux-headers/linux/stddef.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_STDDEF_H +#define _LINUX_STDDEF_H + + + +#ifndef __always_inline +#define __always_inline __inline__ +#endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } +#endif diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index c59692ce0b..4a534edbdc 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -49,7 +49,11 @@ /* Supports VFIO_DMA_UNMAP_FLAG_ALL */ #define VFIO_UNMAP_ALL 9 -/* Supports the vaddr flag for DMA map and unmap */ +/* + * Supports the vaddr flag for DMA map and unmap. Not supported for mediated + * devices, so this capability is subject to change as groups are added or + * removed. + */ #define VFIO_UPDATE_VADDR 10 /* @@ -1343,8 +1347,7 @@ struct vfio_iommu_type1_info_dma_avail { * Map process virtual addresses to IO virtual addresses using the * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. * - * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and - * unblock translation of host virtual addresses in the iova range. The vaddr + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To * maintain memory consistency within the user application, the updated vaddr * must address the same memory object as originally mapped. Failure to do so @@ -1395,9 +1398,9 @@ struct vfio_bitmap { * must be 0. This cannot be combined with the get-dirty-bitmap flag. * * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host - * virtual addresses in the iova range. Tasks that attempt to translate an - * iova's vaddr will block. DMA to already-mapped pages continues. This - * cannot be combined with the get-dirty-bitmap flag. + * virtual addresses in the iova range. DMA to already-mapped pages continues. + * Groups may not be added to the container while any addresses are invalid. + * This cannot be combined with the get-dirty-bitmap flag. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index f9f115a7c7..92e1b700b5 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -180,4 +180,12 @@ */ #define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) +/* Resume a device so it can resume processing virtqueue requests + * + * After the return of this ioctl the device will have restored all the + * necessary states and it is fully operational to continue processing the + * virtqueue descriptors. + */ +#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) + #endif From 9260993e27cdbbd2e829d405cc63b1faefec6088 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Apr 2023 14:54:02 +0200 Subject: [PATCH 14/16] tests: vhost-user-test: release mutex on protocol violation chr_read() is printing an error message and returning with s->data_mutex taken. This can potentially cause a hang. Reported by Coverity. Signed-off-by: Paolo Bonzini --- tests/qtest/vhost-user-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index bf9f7c4248..e4f95b2858 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -351,7 +351,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) if (size != msg.size) { qos_printf("%s: Wrong message size received %d != %d\n", __func__, size, msg.size); - return; + goto out; } } @@ -509,6 +509,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) break; } +out: g_mutex_unlock(&s->data_mutex); } From d66ba6dc1cce914673bd8a89fca30a7715ea70d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 20 Apr 2023 22:29:39 +0200 Subject: [PATCH 15/16] async: Suppress GCC13 false positive in aio_bh_poll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC13 reports an error : ../util/async.c: In function ‘aio_bh_poll’: include/qemu/queue.h:303:22: error: storing the address of local variable ‘slice’ in ‘*ctx.bh_slice_list.sqh_last’ [-Werror=dangling-pointer=] 303 | (head)->sqh_last = &(elm)->field.sqe_next; \ | ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ ../util/async.c:169:5: note: in expansion of macro ‘QSIMPLEQ_INSERT_TAIL’ 169 | QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); | ^~~~~~~~~~~~~~~~~~~~ ../util/async.c:161:17: note: ‘slice’ declared here 161 | BHListSlice slice; | ^~~~~ ../util/async.c:161:17: note: ‘ctx’ declared here But the local variable 'slice' is removed from the global context list in following loop of the same routine. Add a pragma to silent GCC. Cc: Stefan Hajnoczi Cc: Paolo Bonzini Cc: Daniel P. Berrangé Signed-off-by: Cédric Le Goater Reviewed-by: Daniel Henrique Barboza Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Hajnoczi Reviewed-by: Thomas Huth Tested-by: Daniel Henrique Barboza Message-Id: <20230420202939.1982044-1-clg@kaod.org> Signed-off-by: Paolo Bonzini --- util/async.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/util/async.c b/util/async.c index 21016a1ac7..856e1a8a33 100644 --- a/util/async.c +++ b/util/async.c @@ -164,7 +164,21 @@ int aio_bh_poll(AioContext *ctx) /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + + /* + * GCC13 [-Werror=dangling-pointer=] complains that the local variable + * 'slice' is being stored in the global 'ctx->bh_slice_list' but the + * list is emptied before this function returns. + */ +#if !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wdangling-pointer=" +#endif QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); +#if !defined(__clang__) +#pragma GCC diagnostic pop +#endif while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) { QEMUBH *bh; From 42abcc584174166342297421209932a87bdb85f1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Mar 2023 11:07:04 +0100 Subject: [PATCH 16/16] cpus-common: stop using mb_set/mb_read Use a store-release at the end of the work item, and a load-acquire when waiting for the item to be completed. This is the standard message passing pattern and is both enough and clearer than mb_read/mb_set. Signed-off-by: Paolo Bonzini --- cpus-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpus-common.c b/cpus-common.c index b0047e456f..a53716deb4 100644 --- a/cpus-common.c +++ b/cpus-common.c @@ -157,7 +157,7 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data, wi.exclusive = false; queue_work_on_cpu(cpu, &wi); - while (!qatomic_mb_read(&wi.done)) { + while (!qatomic_load_acquire(&wi.done)) { CPUState *self_cpu = current_cpu; qemu_cond_wait(&qemu_work_cond, mutex); @@ -363,7 +363,7 @@ void process_queued_cpu_work(CPUState *cpu) if (wi->free) { g_free(wi); } else { - qatomic_mb_set(&wi->done, true); + qatomic_store_release(&wi->done, true); } } qemu_mutex_unlock(&cpu->work_mutex);