Skip to content

Commit

Permalink
s390: convert to generic entry
Browse files Browse the repository at this point in the history
This patch converts s390 to use the generic entry infrastructure from
kernel/entry/*.

There are a few special things on s390:

- PIF_PER_TRAP is moved to TIF_PER_TRAP as the generic code doesn't
  know about our PIF flags in exit_to_user_mode_loop().

- The old code had several ways to restart syscalls:

  a) PIF_SYSCALL_RESTART, which was only set during execve to force a
     restart after upgrading a process (usually qemu-kvm) to pgste page
     table extensions.

  b) PIF_SYSCALL, which is set by do_signal() to indicate that the
     current syscall should be restarted. This is changed so that
     do_signal() now also uses PIF_SYSCALL_RESTART. Continuing to use
     PIF_SYSCALL doesn't work with the generic code, and changing it
     to PIF_SYSCALL_RESTART makes PIF_SYSCALL and PIF_SYSCALL_RESTART
     more unique.

- On s390 calling sys_sigreturn or sys_rt_sigreturn is implemented by
executing a svc instruction on the process stack which causes a fault.
While handling that fault the fault code sets PIF_SYSCALL to hand over
processing to the syscall code on exit to usermode.

The patch introduces PIF_SYSCALL_RET_SET, which is set if ptrace sets
a return value for a syscall. The s390x ptrace ABI uses r2 both for the
syscall number and return value, so ptrace cannot set the syscall number +
return value at the same time. The flag makes handling that a bit easier.
do_syscall() will just skip executing the syscall if PIF_SYSCALL_RET_SET
is set.

CONFIG_DEBUG_ASCE was removd in favour of the generic CONFIG_DEBUG_ENTRY.
CR1/7/13 will be checked both on kernel entry and exit to contain the
correct asces.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
  • Loading branch information
svens-s390 authored and Vasily Gorbik committed Jan 19, 2021
1 parent ac94a29 commit 56e62a7
Show file tree
Hide file tree
Showing 39 changed files with 652 additions and 920 deletions.
1 change: 1 addition & 0 deletions arch/s390/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ config S390
select GENERIC_ALLOCATOR
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_PTDUMP
Expand Down
10 changes: 6 additions & 4 deletions arch/s390/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ config TRACE_IRQFLAGS_SUPPORT
config EARLY_PRINTK
def_bool y

config DEBUG_USER_ASCE
bool "Debug User ASCE"
config DEBUG_ENTRY
bool "Debug low-level entry code"
depends on DEBUG_KERNEL
help
Check on exit to user space that address space control
elements are setup correctly.
This option enables sanity checks in s390 low-level entry code.
Some of these sanity checks may slow down kernel entries and
exits or otherwise impact performance.

If unsure, say N.
2 changes: 1 addition & 1 deletion arch/s390/configs/debug_defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,6 @@ CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
CONFIG_DEBUG_USER_ASCE=y
CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
Expand All @@ -857,3 +856,4 @@ CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
CONFIG_DEBUG_ENTRY=y
1 change: 0 additions & 1 deletion arch/s390/configs/defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,6 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_DEBUG_USER_ASCE=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
Expand Down
2 changes: 2 additions & 0 deletions arch/s390/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ u64 arch_cpu_idle_time(int cpu);

#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)

void account_idle_time_irq(void);

#endif /* _S390_CPUTIME_H */
7 changes: 3 additions & 4 deletions arch/s390/include/asm/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,7 @@ extern char elf_platform[];
do { \
set_personality(PER_LINUX | \
(current->personality & (~PER_MASK))); \
current->thread.sys_call_table = \
(unsigned long) &sys_call_table; \
current->thread.sys_call_table = sys_call_table; \
} while (0)
#else /* CONFIG_COMPAT */
#define SET_PERSONALITY(ex) \
Expand All @@ -245,11 +244,11 @@ do { \
if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
set_thread_flag(TIF_31BIT); \
current->thread.sys_call_table = \
(unsigned long) &sys_call_table_emu; \
sys_call_table_emu; \
} else { \
clear_thread_flag(TIF_31BIT); \
current->thread.sys_call_table = \
(unsigned long) &sys_call_table; \
sys_call_table; \
} \
} while (0)
#endif /* CONFIG_COMPAT */
Expand Down
60 changes: 60 additions & 0 deletions arch/s390/include/asm/entry-common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_S390_ENTRY_COMMON_H
#define ARCH_S390_ENTRY_COMMON_H

#include <linux/sched.h>
#include <linux/audit.h>
#include <linux/tracehook.h>
#include <linux/processor.h>
#include <linux/uaccess.h>
#include <asm/fpu/api.h>

#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)

void do_per_trap(struct pt_regs *regs);
void do_syscall(struct pt_regs *regs);

typedef void (*pgm_check_func)(struct pt_regs *regs);

extern pgm_check_func pgm_check_table[128];

#ifdef CONFIG_DEBUG_ENTRY
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
{
debug_user_asce(0);
}

#define arch_check_user_regs arch_check_user_regs
#endif /* CONFIG_DEBUG_ENTRY */

static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
unsigned long ti_work)
{
if (ti_work & _TIF_PER_TRAP) {
clear_thread_flag(TIF_PER_TRAP);
do_per_trap(regs);
}

if (ti_work & _TIF_GUARDED_STORAGE)
gs_load_bc_cb(regs);
}

#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work

static __always_inline void arch_exit_to_user_mode(void)
{
if (test_cpu_flag(CIF_FPU))
__load_fpu_regs();

if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
debug_user_asce(1);
}

#define arch_exit_to_user_mode arch_exit_to_user_mode

static inline bool on_thread_stack(void)
{
return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1));
}

#endif
2 changes: 2 additions & 0 deletions arch/s390/include/asm/fpu/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include <linux/preempt.h>

void save_fpu_regs(void);
void load_fpu_regs(void);
void __load_fpu_regs(void);

static inline int test_fp_ctl(u32 fpc)
{
Expand Down
4 changes: 3 additions & 1 deletion arch/s390/include/asm/idle.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ struct s390_idle_data {
unsigned long long clock_idle_exit;
unsigned long long timer_idle_enter;
unsigned long long timer_idle_exit;
unsigned long mt_cycles_enter[8];
};

extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;

void psw_idle(struct s390_idle_data *, unsigned long);
void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
void psw_idle_exit(void);

#endif /* _S390_IDLE_H */
4 changes: 2 additions & 2 deletions arch/s390/include/asm/lowcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ struct lowcore {
psw_t return_mcck_psw; /* 0x02a0 */

/* CPU accounting and timing values. */
__u64 sync_enter_timer; /* 0x02b0 */
__u64 async_enter_timer; /* 0x02b8 */
__u64 sys_enter_timer; /* 0x02b0 */
__u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */
__u64 mcck_enter_timer; /* 0x02c0 */
__u64 exit_timer; /* 0x02c8 */
__u64 user_timer; /* 0x02d0 */
Expand Down
1 change: 1 addition & 0 deletions arch/s390/include/asm/nmi.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc);
void nmi_free_per_cpu(struct lowcore *lc);

void s390_handle_mcck(void);
void __s390_handle_mcck(void);
int s390_do_machine_check(struct pt_regs *regs);

#endif /* __ASSEMBLY__ */
Expand Down
52 changes: 32 additions & 20 deletions arch/s390/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
#include <asm/runtime_instr.h>
#include <asm/fpu/types.h>
#include <asm/fpu/internal.h>
#include <asm/irqflags.h>

typedef long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);

static inline void set_cpu_flag(int flag)
{
Expand Down Expand Up @@ -101,31 +106,32 @@ extern void __bpon(void);
*/
struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
unsigned long sys_call_table; /* system call table address */
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
const sys_call_ptr_t *sys_call_table; /* system call table address */
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */

/* Per-thread information related to debugging */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
unsigned int system_call; /* system call number in signal */
unsigned long last_break; /* last breaking-event-address. */
/* pfault_wait is used to block the process on a pfault event */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
unsigned int system_call; /* system call number in signal */
unsigned long last_break; /* last breaking-event-address. */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
struct gs_cb *gs_cb; /* Current guarded storage cb */
struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
struct gs_cb *gs_cb; /* Current guarded storage cb */
struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
* the end.
Expand Down Expand Up @@ -184,6 +190,7 @@ static inline void release_thread(struct task_struct *tsk) { }

/* Free guarded storage control block */
void guarded_storage_release(struct task_struct *tsk);
void gs_load_bc_cb(struct pt_regs *regs);

unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
Expand Down Expand Up @@ -324,6 +331,11 @@ extern void memcpy_absolute(void *, void *, size_t);
extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);

static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
{
return arch_irqs_disabled_flags(regs->psw.mask);
}

#endif /* __ASSEMBLY__ */

#endif /* __ASM_S390_PROCESSOR_H */
9 changes: 6 additions & 3 deletions arch/s390/include/asm/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
#include <uapi/asm/ptrace.h>

#define PIF_SYSCALL 0 /* inside a system call */
#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
#define PIF_SYSCALL_RESTART 2 /* restart the current system call */
#define PIF_SYSCALL_RESTART 1 /* restart the current system call */
#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */

#define _PIF_SYSCALL BIT(PIF_SYSCALL)
#define _PIF_PER_TRAP BIT(PIF_PER_TRAP)
#define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART)
#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)

#ifndef __ASSEMBLY__
Expand Down Expand Up @@ -68,6 +68,9 @@ enum {
&(*(struct psw_bits *)(&(__psw))); \
}))

#define PGM_INT_CODE_MASK 0x7f
#define PGM_INT_CODE_PER 0x80

/*
* The pt_regs struct defines the way the registers are stored on
* the stack during a system call.
Expand Down
11 changes: 9 additions & 2 deletions arch/s390/include/asm/syscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include <linux/err.h>
#include <asm/ptrace.h>

extern const unsigned long sys_call_table[];
extern const unsigned long sys_call_table_emu[];
extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t sys_call_table_emu[];

static inline long syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
Expand Down Expand Up @@ -56,6 +56,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET);
regs->gprs[2] = error ? error : val;
}

Expand Down Expand Up @@ -97,4 +98,10 @@ static inline int syscall_get_arch(struct task_struct *task)
#endif
return AUDIT_ARCH_S390X;
}

static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
{
return false;
}

#endif /* _ASM_SYSCALL_H */
3 changes: 3 additions & 0 deletions arch/s390/include/asm/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
*/
struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
};

/*
Expand Down Expand Up @@ -68,6 +69,7 @@ void arch_setup_new_exec(void);
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */

#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
Expand All @@ -91,6 +93,7 @@ void arch_setup_new_exec(void);
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)

#define _TIF_31BIT BIT(TIF_31BIT)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/include/asm/uaccess.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <asm/extable.h>
#include <asm/facility.h>

void debug_user_asce(void);
void debug_user_asce(int exit);

static inline int __range_ok(unsigned long addr, unsigned long size)
{
Expand Down
14 changes: 14 additions & 0 deletions arch/s390/include/asm/vtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,18 @@

#define __ARCH_HAS_VTIME_TASK_SWITCH

static inline void update_timer_sys(void)
{
S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
}

static inline void update_timer_mcck(void)
{
S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
}

#endif /* _S390_VTIME_H */
5 changes: 3 additions & 2 deletions arch/s390/include/uapi/asm/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,9 @@
#define ACR_SIZE 4


#define PTRACE_OLDSETOPTIONS 21

#define PTRACE_OLDSETOPTIONS 21
#define PTRACE_SYSEMU 31
#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls

obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
Expand Down

0 comments on commit 56e62a7

Please sign in to comment.