Skip to content

Commit

Permalink
xen/events: defer eoi in case of excessive number of events
Browse files Browse the repository at this point in the history
commit e99502f upstream.

In case rogue guests are sending events at high frequency it might
happen that xen_evtchn_do_upcall() won't stop processing events in
dom0. As this is done in irq handling a crash might be the result.

In order to avoid that, delay further inter-domain events after some
time in xen_evtchn_do_upcall() by forcing eoi processing into a
worker on the same cpu, thus inhibiting new events coming in.

The time after which eoi processing is to be delayed is configurable
via a new module parameter "event_loop_timeout" which specifies the
maximum event loop time in jiffies (default: 2, the value was chosen
after some tests showing that a value of 2 was the lowest with an
only slight drop of dom0 network throughput while multiple guests
performed an event storm).

How long eoi processing will be delayed can be specified via another
parameter "event_eoi_delay" (again in jiffies, default 10, again the
value was chosen after testing with different delay values).

This is part of XSA-332.

Cc: stable@vger.kernel.org
Reported-by: Julien Grall <julien@xen.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Wei Liu <wl@xen.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
jgross1 authored and gregkh committed Nov 5, 2020
1 parent 25c23f0 commit 1d628c3
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 32 deletions.
8 changes: 8 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Expand Up @@ -5462,6 +5462,14 @@
as generic guest with no PV drivers. Currently support
XEN HVM, KVM, HYPER_V and VMWARE guest.

xen.event_eoi_delay= [XEN]
How long to delay EOI handling in case of event
storms (jiffies). Default is 10.

xen.event_loop_timeout= [XEN]
After which time (jiffies) the event handling loop
should start to delay EOI handling. Default is 2.

xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
Expand Down
7 changes: 2 additions & 5 deletions drivers/xen/events/events_2l.c
Expand Up @@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
static void evtchn_2l_handle_events(unsigned cpu)
static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{
int irq;
xen_ulong_t pending_words;
Expand Down Expand Up @@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)

/* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
irq = get_evtchn_to_irq(port);

if (irq != -1)
generic_handle_irq(irq);
handle_irq_for_port(port, ctrl);

bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;

Expand Down
189 changes: 184 additions & 5 deletions drivers/xen/events/events_base.c
Expand Up @@ -35,6 +35,8 @@
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/cpuhotplug.h>
#include <linux/atomic.h>
#include <linux/ktime.h>

#ifdef CONFIG_X86
#include <asm/desc.h>
Expand Down Expand Up @@ -64,6 +66,15 @@

#include "events_internal.h"

#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "xen."

static uint __read_mostly event_loop_timeout = 2;
module_param(event_loop_timeout, uint, 0644);

static uint __read_mostly event_eoi_delay = 10;
module_param(event_eoi_delay, uint, 0644);

const struct evtchn_ops *evtchn_ops;

/*
Expand All @@ -87,6 +98,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
* irq_mapping_update_lock
* evtchn_rwlock
* IRQ-desc lock
* percpu eoi_list_lock
*/

static LIST_HEAD(xen_irq_list_head);
Expand Down Expand Up @@ -119,6 +131,8 @@ static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);

static DEFINE_PER_CPU(unsigned int, irq_epoch);

static void clear_evtchn_to_irq_row(unsigned row)
{
unsigned col;
Expand Down Expand Up @@ -398,17 +412,120 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);

struct lateeoi_work {
struct delayed_work delayed;
spinlock_t eoi_list_lock;
struct list_head eoi_list;
};

static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);

static void lateeoi_list_del(struct irq_info *info)
{
struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
unsigned long flags;

spin_lock_irqsave(&eoi->eoi_list_lock, flags);
list_del_init(&info->eoi_list);
spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
}

static void lateeoi_list_add(struct irq_info *info)
{
struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
struct irq_info *elem;
u64 now = get_jiffies_64();
unsigned long delay;
unsigned long flags;

if (now < info->eoi_time)
delay = info->eoi_time - now;
else
delay = 1;

spin_lock_irqsave(&eoi->eoi_list_lock, flags);

if (list_empty(&eoi->eoi_list)) {
list_add(&info->eoi_list, &eoi->eoi_list);
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed, delay);
} else {
list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
if (elem->eoi_time <= info->eoi_time)
break;
}
list_add(&info->eoi_list, &elem->eoi_list);
}

spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
}

static void xen_irq_lateeoi_locked(struct irq_info *info)
{
evtchn_port_t evtchn;
unsigned int cpu;

evtchn = info->evtchn;
if (!VALID_EVTCHN(evtchn))
if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
return;

cpu = info->eoi_cpu;
if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
lateeoi_list_add(info);
return;
}

info->eoi_time = 0;
unmask_evtchn(evtchn);
}

static void xen_irq_lateeoi_worker(struct work_struct *work)
{
struct lateeoi_work *eoi;
struct irq_info *info;
u64 now = get_jiffies_64();
unsigned long flags;

eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);

read_lock_irqsave(&evtchn_rwlock, flags);

while (true) {
spin_lock(&eoi->eoi_list_lock);

info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
eoi_list);

if (info == NULL || now < info->eoi_time) {
spin_unlock(&eoi->eoi_list_lock);
break;
}

list_del_init(&info->eoi_list);

spin_unlock(&eoi->eoi_list_lock);

info->eoi_time = 0;

xen_irq_lateeoi_locked(info);
}

if (info)
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed, info->eoi_time - now);

read_unlock_irqrestore(&evtchn_rwlock, flags);
}

static void xen_cpu_init_eoi(unsigned int cpu)
{
struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);

INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
spin_lock_init(&eoi->eoi_list_lock);
INIT_LIST_HEAD(&eoi->eoi_list);
}

void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
{
struct irq_info *info;
Expand All @@ -428,6 +545,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;

#ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
Expand All @@ -442,6 +560,7 @@ static void xen_irq_init(unsigned irq)

set_info_for_irq(irq, info);

INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
}

Expand Down Expand Up @@ -497,6 +616,9 @@ static void xen_free_irq(unsigned irq)

write_lock_irqsave(&evtchn_rwlock, flags);

if (!list_empty(&info->eoi_list))
lateeoi_list_del(info);

list_del(&info->list);

set_info_for_irq(irq, NULL);
Expand Down Expand Up @@ -1356,13 +1478,62 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}

struct evtchn_loop_ctrl {
ktime_t timeout;
unsigned count;
bool defer_eoi;
};

void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{
int irq;
struct irq_info *info;

irq = get_evtchn_to_irq(port);
if (irq == -1)
return;

/*
* Check for timeout every 256 events.
* We are setting the timeout value only after the first 256
* events in order to not hurt the common case of few loop
* iterations. The 256 is basically an arbitrary value.
*
* In case we are hitting the timeout we need to defer all further
* EOIs in order to ensure to leave the event handling loop rather
* sooner than later.
*/
if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
ktime_t kt = ktime_get();

if (!ctrl->timeout) {
kt = ktime_add_ms(kt,
jiffies_to_msecs(event_loop_timeout));
ctrl->timeout = kt;
} else if (kt > ctrl->timeout) {
ctrl->defer_eoi = true;
}
}

info = info_for_irq(irq);

if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
info->irq_epoch = __this_cpu_read(irq_epoch);
info->eoi_time = get_jiffies_64() + event_eoi_delay;
}

generic_handle_irq(irq);
}

static DEFINE_PER_CPU(unsigned, xed_nesting_count);

static void __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = get_cpu();
unsigned count;
struct evtchn_loop_ctrl ctrl = { 0 };

read_lock(&evtchn_rwlock);

Expand All @@ -1372,7 +1543,7 @@ static void __xen_evtchn_do_upcall(void)
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;

xen_evtchn_handle_events(cpu);
xen_evtchn_handle_events(cpu, &ctrl);

BUG_ON(!irqs_disabled());

Expand All @@ -1383,6 +1554,13 @@ static void __xen_evtchn_do_upcall(void)
out:
read_unlock(&evtchn_rwlock);

/*
* Increment irq_epoch only now to defer EOIs only for
* xen_irq_lateeoi() invocations occurring from inside the loop
* above.
*/
__this_cpu_inc(irq_epoch);

put_cpu();
}

Expand Down Expand Up @@ -1829,16 +2007,15 @@ void xen_callback_vector(void)
void xen_callback_vector(void) {}
#endif

#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "xen."

static bool fifo_events = true;
module_param(fifo_events, bool, 0);

static int xen_evtchn_cpu_prepare(unsigned int cpu)
{
int ret = 0;

xen_cpu_init_eoi(cpu);

if (evtchn_ops->percpu_init)
ret = evtchn_ops->percpu_init(cpu);

Expand All @@ -1865,6 +2042,8 @@ void __init xen_init_IRQ(void)
if (ret < 0)
xen_evtchn_2l_init();

xen_cpu_init_eoi(smp_processor_id());

cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
"xen/evtchn:prepare",
xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
Expand Down

0 comments on commit 1d628c3

Please sign in to comment.