Skip to content

Commit

Permalink
vtd: optimize CPU cache sync
Browse files Browse the repository at this point in the history
Some VT-d IOMMUs are non-coherent, which requires a cache write back
in order for the changes made by the CPU to be visible to the IOMMU.
This cache write back was unconditionally done using clflush, but there are
other more efficient instructions to do so, hence implement support
for them using the alternative framework.

This is part of XSA-321.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
  • Loading branch information
royger authored and jbeulich committed Jul 7, 2020
1 parent 23570bc commit a64ea16
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 7 deletions.
1 change: 0 additions & 1 deletion xen/drivers/passthrough/vtd/extern.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu,
u16 did, u16 size, u64 addr);

unsigned int get_cache_line_size(void);
void cacheline_flush(char *);
void flush_all_cache(void);

uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
Expand Down
38 changes: 37 additions & 1 deletion xen/drivers/passthrough/vtd/iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <xen/pci_regs.h>
#include <xen/keyhandler.h>
#include <asm/msi.h>
#include <asm/nops.h>
#include <asm/irq.h>
#include <asm/hvm/vmx/vmx.h>
#include <asm/p2m.h>
Expand Down Expand Up @@ -160,7 +161,42 @@ static void sync_cache(const void *addr, unsigned int size)

addr -= (unsigned long)addr & (clflush_size - 1);
for ( ; addr < end; addr += clflush_size )
cacheline_flush((char *)addr);
/*
* The arguments to a macro must not include preprocessor directives. Doing so
* results in undefined behavior, so we have to create some defines here in
* order to avoid it.
*/
#if defined(HAVE_AS_CLWB)
# define CLWB_ENCODING "clwb %[p]"
#elif defined(HAVE_AS_XSAVEOPT)
# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
#else
# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
#endif

#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
# define INPUT BASE_INPUT
#else
# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
#endif
/*
* Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
* + prefix than a clflush + nop, and hence the prefix is added instead
* of letting the alternative framework fill the gap by appending nops.
*/
alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
"data16 clflush %[p]", /* clflushopt */
X86_FEATURE_CLFLUSHOPT,
CLWB_ENCODING,
X86_FEATURE_CLWB, /* no outputs */,
INPUT(addr));
#undef INPUT
#undef BASE_INPUT
#undef CLWB_ENCODING

alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
"sfence", X86_FEATURE_CLWB);
}

/* Allocate page table, return its machine address */
Expand Down
5 changes: 0 additions & 5 deletions xen/drivers/passthrough/vtd/x86/vtd.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ unsigned int get_cache_line_size(void)
return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
}

void cacheline_flush(char * addr)
{
clflush(addr);
}

void flush_all_cache()
{
wbinvd();
Expand Down

0 comments on commit a64ea16

Please sign in to comment.