Skip to content

Commit

Permalink
VMX: use a single, global APIC access page
Browse files Browse the repository at this point in the history
The address of this page is used by the CPU only to recognize when to
access the virtual APIC page instead. No accesses would ever go to this
page. It only needs to be present in the (CPU) page tables so that
address translation will produce its address as result for respective
accesses.

By making this page global, we also eliminate the need to refcount it,
or to assign it to any domain in the first place.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Tim Deegan <tim@xen.org>
  • Loading branch information
jbeulich committed Apr 27, 2021
1 parent 11e7f0f commit 58850b9
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 50 deletions.
71 changes: 23 additions & 48 deletions xen/arch/x86/hvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ boolean_param("force-ept", opt_force_ept);
static void vmx_ctxt_switch_from(struct vcpu *v);
static void vmx_ctxt_switch_to(struct vcpu *v);

static int vmx_alloc_vlapic_mapping(struct domain *d);
static void vmx_free_vlapic_mapping(struct domain *d);
static int alloc_vlapic_mapping(void);
static void vmx_install_vlapic_mapping(struct vcpu *v);
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
unsigned int flags);
Expand All @@ -78,6 +77,8 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
static void vmx_invlpg(struct vcpu *v, unsigned long linear);

static mfn_t __read_mostly apic_access_mfn = INVALID_MFN_INITIALIZER;

/* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
#define PI_CSW_FROM (1u << 0)
#define PI_CSW_TO (1u << 1)
Expand Down Expand Up @@ -401,7 +402,6 @@ static int vmx_domain_initialise(struct domain *d)
.to = vmx_ctxt_switch_to,
.tail = vmx_do_resume,
};
int rc;

d->arch.ctxt_switch = &csw;

Expand All @@ -411,28 +411,22 @@ static int vmx_domain_initialise(struct domain *d)
*/
d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;

if ( !has_vlapic(d) )
return 0;

if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
return rc;

return 0;
}

static void vmx_domain_relinquish_resources(struct domain *d)
static void domain_creation_finished(struct domain *d)
{
if ( !has_vlapic(d) )
gfn_t gfn = gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE);
uint8_t ipat;

if ( !has_vlapic(d) || mfn_eq(apic_access_mfn, INVALID_MFN) )
return;

vmx_free_vlapic_mapping(d);
}
ASSERT(epte_get_entry_emt(d, gfn_x(gfn), apic_access_mfn, 0, &ipat,
true) == MTRR_TYPE_WRBACK);
ASSERT(ipat);

static void domain_creation_finished(struct domain *d)
{
if ( has_vlapic(d) && !mfn_eq(d->arch.hvm.vmx.apic_access_mfn, _mfn(0)) &&
set_mmio_p2m_entry(d, gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE),
d->arch.hvm.vmx.apic_access_mfn, PAGE_ORDER_4K) )
if ( set_mmio_p2m_entry(d, gfn, apic_access_mfn, PAGE_ORDER_4K) )
domain_crash(d);
}

Expand Down Expand Up @@ -2415,7 +2409,6 @@ static struct hvm_function_table __initdata vmx_function_table = {
.cpu_up_prepare = vmx_cpu_up_prepare,
.cpu_dead = vmx_cpu_dead,
.domain_initialise = vmx_domain_initialise,
.domain_relinquish_resources = vmx_domain_relinquish_resources,
.domain_creation_finished = domain_creation_finished,
.vcpu_initialise = vmx_vcpu_initialise,
.vcpu_destroy = vmx_vcpu_destroy,
Expand Down Expand Up @@ -2662,7 +2655,7 @@ const struct hvm_function_table * __init start_vmx(void)
{
set_in_cr4(X86_CR4_VMXE);

if ( vmx_vmcs_init() )
if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
{
printk("VMX: failed to initialise.\n");
return NULL;
Expand Down Expand Up @@ -3236,60 +3229,42 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
return X86EMUL_EXCEPTION;
}

static int vmx_alloc_vlapic_mapping(struct domain *d)
static int __init alloc_vlapic_mapping(void)
{
struct page_info *pg;
mfn_t mfn;

if ( !cpu_has_vmx_virtualize_apic_accesses )
return 0;

pg = alloc_domheap_page(d, MEMF_no_refcount);
pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;

if ( !get_page_and_type(pg, d, PGT_writable_page) )
{
/*
* The domain can't possibly know about this page yet, so failure
* here is a clear indication of something fishy going on.
*/
domain_crash(d);
return -ENODATA;
}
/*
* Signal to shadow code that this page cannot be refcounted. This also
* makes epte_get_entry_emt() recognize this page as "special".
*/
page_suppress_refcounting(pg);

mfn = page_to_mfn(pg);
clear_domain_page(mfn);
d->arch.hvm.vmx.apic_access_mfn = mfn;
apic_access_mfn = mfn;

return 0;
}

static void vmx_free_vlapic_mapping(struct domain *d)
{
mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;

d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
if ( !mfn_eq(mfn, _mfn(0)) )
{
struct page_info *pg = mfn_to_page(mfn);

put_page_alloc_ref(pg);
put_page_and_type(pg);
}
}

static void vmx_install_vlapic_mapping(struct vcpu *v)
{
paddr_t virt_page_ma, apic_page_ma;

if ( mfn_eq(v->domain->arch.hvm.vmx.apic_access_mfn, _mfn(0)) )
if ( !has_vlapic(v->domain) || mfn_eq(apic_access_mfn, INVALID_MFN) )
return;

ASSERT(cpu_has_vmx_virtualize_apic_accesses);

virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
apic_page_ma = mfn_to_maddr(v->domain->arch.hvm.vmx.apic_access_mfn);
apic_page_ma = mfn_to_maddr(apic_access_mfn);

vmx_vmcs_enter(v);
__vmwrite(VIRTUAL_APIC_PAGE_ADDR, virt_page_ma);
Expand Down
9 changes: 9 additions & 0 deletions xen/arch/x86/mm/shadow/set.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
ASSERT(!sh_l1e_is_magic(sl1e));
ASSERT(shadow_mode_refcounts(d));

/*
* Check whether refcounting is suppressed on this page. For example,
* VMX'es APIC access MFN is just a surrogate page. It doesn't actually
* get accessed, and hence there's no need to refcount it.
*/
mfn = shadow_l1e_get_mfn(sl1e);
if ( mfn_valid(mfn) && page_refcounting_suppressed(mfn_to_page(mfn)) )
return 0;

res = get_page_from_l1e(sl1e, d, d);

/*
Expand Down
7 changes: 7 additions & 0 deletions xen/arch/x86/mm/shadow/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,16 @@ int shadow_set_l4e(struct domain *d, shadow_l4e_t *sl4e,
static void inline
shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
{
mfn_t mfn = shadow_l1e_get_mfn(sl1e);

if ( !shadow_mode_refcounts(d) )
return;

if ( mfn_valid(mfn) &&
/* See the respective comment in shadow_get_page_from_l1e(). */
page_refcounting_suppressed(mfn_to_page(mfn)) )
return;

put_page_from_l1e(sl1e, d);
}

Expand Down
1 change: 0 additions & 1 deletion xen/include/asm-x86/hvm/vmx/vmcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ struct ept_data {
#define _VMX_DOMAIN_PML_ENABLED 0
#define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
struct vmx_domain {
mfn_t apic_access_mfn;
/* VMX_DOMAIN_* */
unsigned int status;

Expand Down
20 changes: 19 additions & 1 deletion xen/include/asm-x86/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
#define PGC_state_offlined PG_mask(2, 9)
#define PGC_state_free PG_mask(3, 9)
#define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
/* Page is not reference counted */
/* Page is not reference counted (see below for caveats) */
#define _PGC_extra PG_shift(10)
#define PGC_extra PG_mask(1, 10)

Expand Down Expand Up @@ -374,6 +374,24 @@ void zap_ro_mpt(mfn_t mfn);

bool is_iomem_page(mfn_t mfn);

/*
* Pages with no owner which may get passed to functions wanting to
* refcount them can be marked PGC_extra to bypass this refcounting (which
* would fail due to the lack of an owner).
*
* (For pages with owner PGC_extra has different meaning.)
*/
static inline void page_suppress_refcounting(struct page_info *pg)
{
ASSERT(!page_get_owner(pg));
pg->count_info |= PGC_extra;
}

static inline bool page_refcounting_suppressed(const struct page_info *pg)
{
return !page_get_owner(pg) && (pg->count_info & PGC_extra);
}

struct platform_bad_page {
unsigned long mfn;
unsigned int order;
Expand Down

0 comments on commit 58850b9

Please sign in to comment.