/
watchdog.go
224 lines (201 loc) · 6.42 KB
/
watchdog.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
// Copyright 2021 the u-root Authors. All rights reserved
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package watchdog provides functions for interacting with the Linux watchdog.
//
// The basic usage is:
// wd, err := watchdog.Open(watchdog.Dev)
// while running {
// wd.KeepAlive()
// }
// wd.MagicClose()
//
// Open() arms the watchdog. MagicClose() disarms the watchdog.
//
// Note not every watchdog driver supports every function!
//
// For more, see:
// https://www.kernel.org/doc/Documentation/watchdog/watchdog-api.txt
package watchdog
import (
"fmt"
"os"
"time"
"unsafe"
"golang.org/x/sys/unix"
)
// Dev is the name of the first watchdog. If there are multiple watchdogs, they
// are named /dev/watchdog0, /dev/watchdog1, ...
const Dev = "/dev/watchdog"
// Various ioctl numbers.
const (
wdiocGetSupport = 0x80285700
wdiocGetStatus = 0x80045701
wdiocGetBootStatus = 0x80045702
wdiocGetTemp = 0x80045703
wdiocSetOptions = 0x80045704
wdiocKeepAlive = 0x80045705
wdiocSetTimeout = 0xc0045706
wdiocGetTimeout = 0x80045707
wdiocSetPreTimeout = 0xc0045708
wdiocGetPreTimeout = 0x80045709
wdiocGetTimeLeft = 0x8004570a
)
// Status contains flags returned by Status() and BootStatus(). These are the
// same flags used for Support()'s options field.
type Status int32
// Bitset of possible flags for the Status() type.
const (
// Unknown flag error
StatusUnknown Status = -1
// Reset due to CPU overheat
StatusOverheat Status = 0x0001
// Fan failed
StatusFanFault Status = 0x0002
// External relay 1
StatusExtern1 Status = 0x0004
// ExStatusl relay 2
StatusExtern2 Status = 0x0008
// Power bad/power fault
StatusPowerUnder Status = 0x0010
// Card previously reset the CPU
StatusCardReset Status = 0x0020
// Power over voltage
StatusPowerOver Status = 0x0040
// Set timeout (in seconds)
StatusSetTimeout Status = 0x0080
// Supports magic close char
StatusMagicClose Status = 0x0100
// Pretimeout (in seconds), get/set
StatusPreTimeout Status = 0x0200
// Watchdog triggers a management or other external alarm not a reboot
StatusAlarmOnly Status = 0x0400
// Keep alive ping reply
StatusKeepAlivePing Status = 0x8000
)
// Option are options passed to SetOptions().
type Option int32
// Bitset of possible flags for the Option type.
const (
// Unknown status error
OptionUnknown Option = -1
// Turn off the watchdog timer
OptionDisableCard Option = 0x0001
// Turn on the watchdog timer
OptionEnableCard Option = 0x0002
// Kernel panic on temperature trip
OptionTempPanic Option = 0x0004
)
// Watchdog holds the descriptor of an open watchdog driver.
type Watchdog struct {
f *os.File
}
// Open arms the watchdog.
func Open(dev string) (*Watchdog, error) {
f, err := os.OpenFile(dev, os.O_RDWR, 0)
if err != nil {
return nil, err
}
return &Watchdog{f: f}, nil
}
// Close closes the device without disarming the watchdog.
func (w *Watchdog) Close() error {
return w.f.Close()
}
// MagicClose disarms the watchdog. However if the kernel is compiled with
// CONFIG_WATCHDOG_NOWAYOUT=y, there may be no way to disarm the watchdog.
func (w *Watchdog) MagicClose() error {
if _, err := w.f.Write([]byte("V")); err != nil {
w.f.Close()
return err
}
return w.f.Close()
}
// Support returns the WatchdogInfo struct.
func (w *Watchdog) Support() (*unix.WatchdogInfo, error) {
var wi unix.WatchdogInfo
if _, _, err := unix.Syscall(unix.SYS_IOCTL, w.f.Fd(), wdiocGetSupport, uintptr(unsafe.Pointer(&wi))); err != 0 {
return nil, err
}
return &wi, nil
}
// Status returns the current status.
func (w *Watchdog) Status() (Status, error) {
flags, err := unix.IoctlGetUint32(int(w.f.Fd()), wdiocGetStatus)
if err != nil {
return StatusUnknown, err
}
return Status(flags), nil
}
// BootStatus returns the status at the last reboot.
func (w *Watchdog) BootStatus() (Status, error) {
flags, err := unix.IoctlGetUint32(int(w.f.Fd()), wdiocGetBootStatus)
if err != nil {
return StatusUnknown, err
}
return Status(flags), nil
}
// SetOptions can be used to control some aspects of the cards operation.
func (w *Watchdog) SetOptions(options Option) error {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, w.f.Fd(), wdiocSetOptions, uintptr(unsafe.Pointer(&options))); err != 0 {
return err
}
return nil
}
// KeepAlive pets the watchdog.
func (w *Watchdog) KeepAlive() error {
_, err := w.f.WriteString("1")
return err
}
// SetTimeout sets the watchdog timeout on the fly. It returns an error if the
// timeout gets set to the wrong value. timeout must be a multiple of seconds;
// otherwise, an error is returned.
func (w *Watchdog) SetTimeout(timeout time.Duration) error {
timeoutSecs := timeout / time.Second
if _, _, err := unix.Syscall(unix.SYS_IOCTL, w.f.Fd(), wdiocSetTimeout, uintptr(unsafe.Pointer(&timeout))); err != 0 {
return err
}
gotTimeout := timeoutSecs * time.Second
if gotTimeout != timeout {
return fmt.Errorf("Watchdog timeout set to %v, wanted %v", gotTimeout, timeout)
}
return nil
}
// Timeout returns the current watchdog timeout.
func (w *Watchdog) Timeout() (time.Duration, error) {
timeout, err := unix.IoctlGetUint32(int(w.f.Fd()), wdiocGetTimeout)
if err != nil {
return 0, err
}
return time.Duration(timeout) * time.Second, nil
}
// SetPreTimeout sets the watchdog pretimeout on the fly. The pretimeout is the
// duration before triggering the preaction (such as an NMI, interrupt, ...).
// timeout must be a multiple of seconds; otherwise, an error is returned.
func (w *Watchdog) SetPreTimeout(timeout time.Duration) error {
timeoutSecs := timeout / time.Second
if _, _, err := unix.Syscall(unix.SYS_IOCTL, w.f.Fd(), wdiocSetPreTimeout, uintptr(unsafe.Pointer(&timeout))); err != 0 {
return err
}
gotTimeout := timeoutSecs * time.Second
if gotTimeout != timeout {
return fmt.Errorf("Watchdog pretimeout set to %v, wanted %v", gotTimeout, timeout)
}
return nil
}
// PreTimeout returns the current watchdog pretimeout.
func (w *Watchdog) PreTimeout() (time.Duration, error) {
timeout, err := unix.IoctlGetUint32(int(w.f.Fd()), wdiocGetPreTimeout)
if err != nil {
return 0, err
}
return time.Duration(timeout) * time.Second, nil
}
// TimeLeft returns the duration before the reboot (to the nearest second).
func (w *Watchdog) TimeLeft() (time.Duration, error) {
left, err := unix.IoctlGetUint32(int(w.f.Fd()), wdiocGetTimeLeft)
if err != nil {
return 0, err
}
return time.Duration(left) * time.Second, nil
}