Skip to content

Commit

Permalink
kvm: x86: only provide PV features if enabled in guest's CPUID
Browse files Browse the repository at this point in the history
KVM unconditionally provides PV features to the guest, regardless of the
configured CPUID. An unwitting guest that doesn't check
KVM_CPUID_FEATURES before use could access paravirt features that
userspace did not intend to provide. Fix this by checking the guest's
CPUID before performing any paravirtual operations.

Introduce a capability, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, to gate the
aforementioned enforcement. Migrating a VM from a host w/o this patch to
a host with this patch could silently change the ABI exposed to the
guest, warranting that we default to the old behavior and opt-in for
the new one.

Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Change-Id: I202a0926f65035b872bfe8ad15307c026de59a98
Message-Id: <20200818152429.1923996-4-oupton@google.com>
Reviewed-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
oupton authored and bonzini committed Oct 21, 2020
1 parent 210dfd9 commit 66570e9
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 5 deletions.
11 changes: 11 additions & 0 deletions Documentation/virt/kvm/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6380,3 +6380,14 @@ ranges that KVM should reject access to.
In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.


8.26 KVM_CAP_ENFORCE_PV_CPUID
-----------------------------

Architectures: x86

When enabled, KVM will disable paravirtual features provided to the
guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
(0x40000001). Otherwise, a guest may use the paravirtual features
regardless of what has actually been exposed through the CPUID leaf.
15 changes: 15 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,21 @@ struct kvm_vcpu_arch {

/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;

/* pv related cpuid info */
struct {
/*
* value of the eax register in the KVM_CPUID_FEATURES CPUID
* leaf.
*/
u32 features;

/*
* indicates whether pv emulation should be disabled if features
* are not present in the guest's cpuid
*/
bool enforce;
} pv_cpuid;
};

struct kvm_lpage_info {
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

/*
* save the feature bitmap to avoid cpuid lookup for every PV
* operation
*/
if (best)
vcpu->arch.pv_cpuid.features = best->eax;

if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best)
Expand Down
10 changes: 10 additions & 0 deletions arch/x86/kvm/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "x86.h"
#include <asm/cpu.h>
#include <asm/processor.h>
#include <uapi/asm/kvm_para.h>

extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
Expand Down Expand Up @@ -313,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
}

static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
unsigned int kvm_feature)
{
if (!vcpu->arch.pv_cpuid.enforce)
return true;

return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}

#endif
67 changes: 62 additions & 5 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -2877,6 +2877,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
if (data & 0x30)
return 1;

if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
(data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
return 1;

if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
(data & KVM_ASYNC_PF_DELIVERY_AS_INT))
return 1;

if (!lapic_in_kernel(vcpu))
return data ? 1 : 0;

Expand Down Expand Up @@ -2954,10 +2962,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
st->preempted & KVM_VCPU_FLUSH_TLB);
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
st->preempted & KVM_VCPU_FLUSH_TLB);
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
}

vcpu->arch.st.preempted = 0;

Expand Down Expand Up @@ -3118,30 +3128,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.smi_count = data;
break;
case MSR_KVM_WALL_CLOCK_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;

kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_WALL_CLOCK:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;

kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_SYSTEM_TIME_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;

kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
break;
case MSR_KVM_SYSTEM_TIME:
kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;

kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
break;
case MSR_KVM_ASYNC_PF_EN:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
return 1;

if (kvm_pv_enable_async_pf(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_INT:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;

if (kvm_pv_enable_async_pf_int(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
return 1;
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
kvm_check_async_pf_completion(vcpu);
}
break;
case MSR_KVM_STEAL_TIME:
if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
return 1;

if (unlikely(!sched_info_on()))
return 1;
Expand All @@ -3158,11 +3192,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)

break;
case MSR_KVM_PV_EOI_EN:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
return 1;

if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;

case MSR_KVM_POLL_CONTROL:
if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
return 1;

/* only enable bit supported */
if (data & (-1ULL << 1))
return 1;
Expand Down Expand Up @@ -3658,6 +3698,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_LAST_CPU:
case KVM_CAP_X86_USER_SPACE_MSR:
case KVM_CAP_X86_MSR_FILTER:
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
Expand Down Expand Up @@ -4528,6 +4569,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,

return kvm_x86_ops.enable_direct_tlbflush(vcpu);

case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];

return 0;

default:
return -EINVAL;
}
Expand Down Expand Up @@ -8000,11 +8046,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
goto out;
}

ret = -KVM_ENOSYS;

switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
case KVM_HC_KICK_CPU:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
break;

kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
kvm_sched_yield(vcpu->kvm, a1);
ret = 0;
Expand All @@ -8015,9 +8066,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
#endif
case KVM_HC_SEND_IPI:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
break;

ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
case KVM_HC_SCHED_YIELD:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;

kvm_sched_yield(vcpu->kvm, a0);
ret = 0;
break;
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_STEAL_TIME 187
#define KVM_CAP_X86_USER_SPACE_MSR 188
#define KVM_CAP_X86_MSR_FILTER 189
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190

#ifdef KVM_CAP_IRQ_ROUTING

Expand Down

0 comments on commit 66570e9

Please sign in to comment.