Skip to content

Commit

Permalink
powerpc/perf: power10 Performance Monitoring support
Browse files Browse the repository at this point in the history
Base enablement patch to register performance monitoring hardware
support for power10. Patch introduce the raw event encoding format,
defines the supported list of events, config fields for the event
attributes and their corresponding bit values which are exported via
sysfs.

Patch also enhances the support function in isa207_common.c to include
power10 pmu hardware.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1594996707-3727-9-git-send-email-atrajeev@linux.vnet.ibm.com
  • Loading branch information
athira-rajeev authored and mpe committed Jul 22, 2020
1 parent 9908c82 commit a64e697
Show file tree
Hide file tree
Showing 7 changed files with 566 additions and 11 deletions.
2 changes: 1 addition & 1 deletion arch/powerpc/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o \
generic-compat-pmu.o
generic-compat-pmu.o power10-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o

obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
Expand Down
2 changes: 2 additions & 0 deletions arch/powerpc/perf/core-book3s.c
Original file line number Diff line number Diff line change
Expand Up @@ -2333,6 +2333,8 @@ static int __init init_ppc64_pmu(void)
return 0;
else if (!init_power9_pmu())
return 0;
else if (!init_power10_pmu())
return 0;
else if (!init_ppc970_pmu())
return 0;
else
Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/perf/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ extern int init_power6_pmu(void);
extern int init_power7_pmu(void);
extern int init_power8_pmu(void);
extern int init_power9_pmu(void);
extern int init_power10_pmu(void);
extern int init_generic_compat_pmu(void);
59 changes: 50 additions & 9 deletions arch/powerpc/perf/isa207-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ static bool is_event_valid(u64 event)
{
u64 valid_mask = EVENT_VALID_MASK;

if (cpu_has_feature(CPU_FTR_ARCH_300))
if (cpu_has_feature(CPU_FTR_ARCH_31))
valid_mask = p10_EVENT_VALID_MASK;
else if (cpu_has_feature(CPU_FTR_ARCH_300))
valid_mask = p9_EVENT_VALID_MASK;

return !(event & ~valid_mask);
Expand All @@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event)
return false;
}

static unsigned long sdar_mod_val(u64 event)
{
if (cpu_has_feature(CPU_FTR_ARCH_31))
return p10_SDAR_MODE(event);

return p9_SDAR_MODE(event);
}

static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
{
/*
Expand All @@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
* MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
* mode and will be un-changed when setting MMCRA[63] (Marked events).
*
* Incase of Power9:
* Incase of Power9/power10:
* Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
* or if group already have any marked events.
* For rest
Expand All @@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
else if (p9_SDAR_MODE(event))
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
else if (sdar_mod_val(event))
*mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
else
*mmcra |= MMCRA_SDAR_MODE_DCACHE;
} else
Expand Down Expand Up @@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event)
/*
* Check the mantissa upper two bits are not zero, unless the
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
* Power10: thresh_cmp is replaced by l2_l3 event select.
*/
if (cpu_has_feature(CPU_FTR_ARCH_31))
return false;

cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
exp = cmp >> 7;

Expand Down Expand Up @@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)

pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
if (cpu_has_feature(CPU_FTR_ARCH_31))
cache = (event >> EVENT_CACHE_SEL_SHIFT) &
p10_EVENT_CACHE_SEL_MASK;
else
cache = (event >> EVENT_CACHE_SEL_SHIFT) &
EVENT_CACHE_SEL_MASK;
ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;

if (pmc) {
Expand Down Expand Up @@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}

if (unit >= 6 && unit <= 9) {
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
mask |= CNST_L2L3_GROUP_MASK;
value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
mask |= CNST_CACHE_GROUP_MASK;
value |= CNST_CACHE_GROUP_VAL(event & 0xff);

Expand Down Expand Up @@ -367,6 +389,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
struct perf_event *pevents[])
{
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
unsigned long mmcr3;
unsigned int pmc, pmc_inuse;
int i;

Expand All @@ -379,7 +402,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
pmc_inuse |= 1 << pmc;
}

mmcra = mmcr1 = mmcr2 = 0;
mmcra = mmcr1 = mmcr2 = mmcr3 = 0;

/* Second pass: assign PMCs, set all MMCR1 fields */
for (i = 0; i < n_ev; ++i) {
Expand Down Expand Up @@ -438,8 +461,17 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcra |= val << MMCRA_THR_CTL_SHIFT;
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
mmcra |= val << MMCRA_THR_SEL_SHIFT;
val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
mmcra |= thresh_cmp_val(val);
if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
val = (event[i] >> EVENT_THR_CMP_SHIFT) &
EVENT_THR_CMP_MASK;
mmcra |= thresh_cmp_val(val);
}
}

if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
p10_EVENT_L2L3_SEL_MASK;
mmcr2 |= val << p10_L2L3_SEL_SHIFT;
}

if (event[i] & EVENT_WANTS_BHRB) {
Expand All @@ -460,6 +492,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcr2 |= MMCR2_FCS(pmc);
}

if (cpu_has_feature(CPU_FTR_ARCH_31)) {
if (pmc <= 4) {
val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
p10_EVENT_MMCR3_MASK;
mmcr3 |= val << MMCR3_SHIFT(pmc);
}
}

hwc[i] = pmc - 1;
}

Expand All @@ -480,6 +520,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcr->mmcr1 = mmcr1;
mmcr->mmcra = mmcra;
mmcr->mmcr2 = mmcr2;
mmcr->mmcr3 = mmcr3;

return 0;
}
Expand Down
33 changes: 32 additions & 1 deletion arch/powerpc/perf/isa207-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,31 @@
EVENT_LINUX_MASK | \
EVENT_PSEL_MASK))

/* Contants to support power10 raw encoding format */
#define p10_SDAR_MODE_SHIFT 22
#define p10_SDAR_MODE_MASK 0x3ull
#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \
p10_SDAR_MODE_MASK)
#define p10_EVENT_L2L3_SEL_MASK 0x1f
#define p10_L2L3_SEL_SHIFT 3
#define p10_L2L3_EVENT_SHIFT 40
#define p10_EVENT_THRESH_MASK 0xffffull
#define p10_EVENT_CACHE_SEL_MASK 0x3ull
#define p10_EVENT_MMCR3_MASK 0x7fffull
#define p10_EVENT_MMCR3_SHIFT 45

#define p10_EVENT_VALID_MASK \
((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
(p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
(EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
(p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
(EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
(EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
(p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
EVENT_LINUX_MASK | \
EVENT_PSEL_MASK))
/*
* Layout of constraint bits:
*
Expand Down Expand Up @@ -135,6 +160,9 @@
#define CNST_CACHE_PMC4_VAL (1ull << 54)
#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL

#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f)

/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
Expand Down Expand Up @@ -191,7 +219,7 @@
#define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
MMCRA_THR_CTR_EXP_MASK)

/* MMCR1 Threshold Compare bit constant for power9 */
/* MMCRA Threshold Compare bit constant for power9 */
#define p9_MMCRA_THR_CMP_SHIFT 45

/* Bits in MMCR2 for PowerISA v2.07 */
Expand All @@ -202,6 +230,9 @@
#define MAX_ALT 2
#define MAX_PMU_COUNTERS 6

/* Bits in MMCR3 for PowerISA v3.10 */
#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1)))

#define ISA207_SIER_TYPE_SHIFT 15
#define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT)

Expand Down
70 changes: 70 additions & 0 deletions arch/powerpc/perf/power10-events-list.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Performance counter support for POWER10 processors.
*
* Copyright 2020 Madhavan Srinivasan, IBM Corporation.
* Copyright 2020 Athira Rajeev, IBM Corporation.
*/

/*
* Power10 event codes.
*/
EVENT(PM_RUN_CYC, 0x600f4);
EVENT(PM_DISP_STALL_CYC, 0x100f8);
EVENT(PM_EXEC_STALL, 0x30008);
EVENT(PM_RUN_INST_CMPL, 0x500fa);
EVENT(PM_BR_CMPL, 0x4d05e);
EVENT(PM_BR_MPRED_CMPL, 0x400f6);

/* All L1 D cache load references counted at finish, gated by reject */
EVENT(PM_LD_REF_L1, 0x100fc);
/* Load Missed L1 */
EVENT(PM_LD_MISS_L1, 0x3e054);
/* Store Missed L1 */
EVENT(PM_ST_MISS_L1, 0x300f0);
/* L1 cache data prefetches */
EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c);
/* Demand iCache Miss */
EVENT(PM_L1_ICACHE_MISS, 0x200fc);
/* Instruction fetches from L1 */
EVENT(PM_INST_FROM_L1, 0x04080);
/* Instruction Demand sectors wriittent into IL1 */
EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040);
/* Instruction prefetch written into IL1 */
EVENT(PM_IC_PREF_REQ, 0x040a0);
/* The data cache was reloaded from local core's L3 due to a demand load */
EVENT(PM_DATA_FROM_L3, 0x01340000001c040);
/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
/* Data PTEG reload */
EVENT(PM_DTLB_MISS, 0x300fc);
/* ITLB Reloaded */
EVENT(PM_ITLB_MISS, 0x400fc);

EVENT(PM_RUN_CYC_ALT, 0x0001e);
EVENT(PM_RUN_INST_CMPL_ALT, 0x00002);

/*
* Memory Access Events
*
* Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
* To enable capturing of memory profiling, these MMCRA bits
* needs to be programmed and corresponding raw event format
* encoding.
*
* MMCRA bits encoding needed are
* SM (Sampling Mode)
* EM (Eligibility for Random Sampling)
* TECE (Threshold Event Counter Event)
* TS (Threshold Start Event)
* TE (Threshold End Event)
*
* Corresponding Raw Encoding bits:
* sample [EM,SM]
* thresh_sel (TECE)
* thresh start (TS)
* thresh end (TE)
*/

EVENT(MEM_LOADS, 0x34340401e0);
EVENT(MEM_STORES, 0x343c0401e0);

0 comments on commit a64e697

Please sign in to comment.