Skip to content

Commit

Permalink
powerpc/tau: Disable TAU between measurements
Browse files Browse the repository at this point in the history
[ Upstream commit e63d6fb ]

Enabling CONFIG_TAU_INT causes random crashes:

Unrecoverable exception 1700 at c0009414 (msr=1000)
Oops: Unrecoverable exception, sig: 6 [#1]
BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac-00043-gd5f545e1a8593 #5
NIP:  c0009414 LR: c0009414 CTR: c00116fc
REGS: c0799eb8 TRAP: 1700   Not tainted  (5.7.0-pmac-00043-gd5f545e1a8593)
MSR:  00001000 <ME>  CR: 22000228  XER: 00000100

GPR00: 00000000 c0799f70 c076e300 00800000 0291c0ac 00e00000 c076e300 00049032
GPR08: 00000001 c00116fc 00000000 dfbd3200 ffffffff 007f80a8 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c075ce04
GPR24: c075ce04 dfff8880 c07b0000 c075ce04 00080000 00000001 c079ef98 c079ef5c
NIP [c0009414] arch_cpu_idle+0x24/0x6c
LR [c0009414] arch_cpu_idle+0x24/0x6c
Call Trace:
[c0799f70] [00000001] 0x1 (unreliable)
[c0799f80] [c0060990] do_idle+0xd8/0x17c
[c0799fa0] [c0060ba4] cpu_startup_entry+0x20/0x28
[c0799fb0] [c072d220] start_kernel+0x434/0x44c
[c0799ff0] [00003860] 0x3860
Instruction dump:
XXXXXXXX XXXXXXXX XXXXXXXX 3d20c07b XXXXXXXX XXXXXXXX XXXXXXXX 7c0802a6
XXXXXXXX XXXXXXXX XXXXXXXX 4e800421 XXXXXXXX XXXXXXXX XXXXXXXX 7d2000a6
---[ end trace 3a0c9b5cb216db6b ]---

Resolve this problem by disabling each THRMn comparator when handling
the associated THRMn interrupt and by disabling the TAU entirely when
updating THRMn thresholds.

Fixes: 1da177e ("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/5a0ba3dc5612c7aac596727331284a3676c08472.1599260540.git.fthain@telegraphics.com.au
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Finn Thain authored and gregkh committed Oct 29, 2020
1 parent 7a1a068 commit b4fde57
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 48 deletions.
65 changes: 24 additions & 41 deletions arch/powerpc/kernel/tau_6xx.c
Expand Up @@ -42,8 +42,6 @@ static struct tau_temp

static bool tau_int_enable;

#undef DEBUG

/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
/* configurable values for step size and how much to expand the window when
Expand All @@ -67,42 +65,33 @@ static void set_thresholds(unsigned long cpu)

static void TAUupdate(int cpu)
{
unsigned thrm;

#ifdef DEBUG
printk("TAUupdate ");
#endif
u32 thrm;
u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;

/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
if(thrm & THRM1_TIN){ /* crossed low threshold */
if (tau[cpu].low >= step_size){
tau[cpu].low -= step_size;
tau[cpu].high -= (step_size - window_expand);
}
tau[cpu].grew = 1;
#ifdef DEBUG
printk("low threshold crossed ");
#endif
thrm = mfspr(SPRN_THRM1);
if ((thrm & bits) == bits) {
mtspr(SPRN_THRM1, 0);

if (tau[cpu].low >= step_size) {
tau[cpu].low -= step_size;
tau[cpu].high -= (step_size - window_expand);
}
tau[cpu].grew = 1;
pr_debug("%s: low threshold crossed\n", __func__);
}
if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
if(thrm & THRM1_TIN){ /* crossed high threshold */
if (tau[cpu].high <= 127-step_size){
tau[cpu].low += (step_size - window_expand);
tau[cpu].high += step_size;
}
tau[cpu].grew = 1;
#ifdef DEBUG
printk("high threshold crossed ");
#endif
thrm = mfspr(SPRN_THRM2);
if ((thrm & bits) == bits) {
mtspr(SPRN_THRM2, 0);

if (tau[cpu].high <= 127 - step_size) {
tau[cpu].low += (step_size - window_expand);
tau[cpu].high += step_size;
}
tau[cpu].grew = 1;
pr_debug("%s: high threshold crossed\n", __func__);
}

#ifdef DEBUG
printk("grew = %d\n", tau[cpu].grew);
#endif
}

#ifdef CONFIG_TAU_INT
Expand All @@ -127,17 +116,17 @@ void TAUException(struct pt_regs * regs)
static void tau_timeout(void * info)
{
int cpu;
unsigned long flags;
int size;
int shrink;

/* disabling interrupts *should* be okay */
local_irq_save(flags);
cpu = smp_processor_id();

if (!tau_int_enable)
TAUupdate(cpu);

/* Stop thermal sensor comparisons and interrupts */
mtspr(SPRN_THRM3, 0);

size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
/* do an exponential shrink of half the amount currently over size */
Expand All @@ -159,18 +148,12 @@ static void tau_timeout(void * info)

set_thresholds(cpu);

/*
* Do the enable every time, since otherwise a bunch of (relatively)
* complex sleep code needs to be added. One mtspr every time
* tau_timeout is called is probably not a big deal.
*
/* Restart thermal sensor comparisons and interrupts.
* The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
* recommends that "the maximum value be set in THRM3 under all
* conditions."
*/
mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);

local_irq_restore(flags);
}

static struct workqueue_struct *tau_workq;
Expand Down
9 changes: 2 additions & 7 deletions arch/powerpc/platforms/Kconfig
Expand Up @@ -227,20 +227,15 @@ config TAU
don't assume the cpu temp is actually what /proc/cpuinfo says it is.

config TAU_INT
bool "Interrupt driven TAU driver (DANGEROUS)"
bool "Interrupt driven TAU driver (EXPERIMENTAL)"
depends on TAU
help
The TAU supports an interrupt driven mode which causes an interrupt
whenever the temperature goes out of range. This is the fastest way
to get notified the temp has exceeded a range. With this option off,
a timer is used to re-check the temperature periodically.

However, on some cpus it appears that the TAU interrupt hardware
is buggy and can cause a situation which would lead unexplained hard
lockups.

Unless you are extending the TAU driver, or enjoy kernel/hardware
debugging, leave this option off.
If in doubt, say N here.

config TAU_AVERAGE
bool "Average high and low temp"
Expand Down

0 comments on commit b4fde57

Please sign in to comment.