-
Notifications
You must be signed in to change notification settings - Fork 18k
/
Copy pathpidfd_linux.go
208 lines (180 loc) · 5.72 KB
/
pidfd_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Support for pidfd was added during the course of a few Linux releases:
// v5.1: pidfd_send_signal syscall;
// v5.2: CLONE_PIDFD flag for clone syscall;
// v5.3: pidfd_open syscall, clone3 syscall;
// v5.4: P_PIDFD idtype support for waitid syscall;
// v5.6: pidfd_getfd syscall.
//
// N.B. Alternative Linux implementations may not follow this ordering. e.g.,
// QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until
// 8.0.
package os
import (
"errors"
"internal/syscall/unix"
"runtime"
"sync"
"syscall"
_ "unsafe" // for linkname
)
// ensurePidfd initializes the PidFD field in sysAttr if it is not already set.
// It returns the original or modified SysProcAttr struct and a flag indicating
// whether the PidFD should be duplicated before using.
func ensurePidfd(sysAttr *syscall.SysProcAttr) (*syscall.SysProcAttr, bool) {
if !pidfdWorks() {
return sysAttr, false
}
var pidfd int
if sysAttr == nil {
return &syscall.SysProcAttr{
PidFD: &pidfd,
}, false
}
if sysAttr.PidFD == nil {
newSys := *sysAttr // copy
newSys.PidFD = &pidfd
return &newSys, false
}
return sysAttr, true
}
// getPidfd returns the value of sysAttr.PidFD (or its duplicate if needDup is
// set) and a flag indicating whether the value can be used.
func getPidfd(sysAttr *syscall.SysProcAttr, needDup bool) (uintptr, bool) {
if !pidfdWorks() {
return 0, false
}
h := *sysAttr.PidFD
if needDup {
dupH, e := unix.Fcntl(h, syscall.F_DUPFD_CLOEXEC, 0)
if e != nil {
return 0, false
}
h = dupH
}
return uintptr(h), true
}
// pidfdFind returns the process handle for pid.
func pidfdFind(pid int) (uintptr, error) {
if !pidfdWorks() {
return 0, syscall.ENOSYS
}
h, err := unix.PidFDOpen(pid, 0)
if err != nil {
return 0, convertESRCH(err)
}
return h, nil
}
// pidfdWait waits for the process to complete,
// and updates the process status to done.
func (p *Process) pidfdWait() (*ProcessState, error) {
// When pidfd is used, there is no wait/kill race (described in CL 23967)
// because the PID recycle issue doesn't exist (IOW, pidfd, unlike PID,
// is guaranteed to refer to one particular process). Thus, there is no
// need for the workaround (blockUntilWaitable + sigMu) from pidWait.
//
// We _do_ need to be careful about reuse of the pidfd FD number when
// closing the pidfd. See handle for more details.
handle, status := p.handleTransientAcquire()
switch status {
case statusDone:
// Process already completed Wait, or was not found by
// pidfdFind. Return ECHILD for consistency with what the wait
// syscall would return.
return nil, NewSyscallError("wait", syscall.ECHILD)
case statusReleased:
return nil, syscall.EINVAL
}
defer p.handleTransientRelease()
var (
info unix.SiginfoChild
rusage syscall.Rusage
)
err := ignoringEINTR(func() error {
return unix.Waitid(unix.P_PIDFD, int(handle), &info, syscall.WEXITED, &rusage)
})
if err != nil {
return nil, NewSyscallError("waitid", err)
}
// Update the Process status to statusDone.
// This also releases a reference to the handle.
p.doRelease(statusDone)
return &ProcessState{
pid: int(info.Pid),
status: info.WaitStatus(),
rusage: &rusage,
}, nil
}
// pidfdSendSignal sends a signal to the process.
func (p *Process) pidfdSendSignal(s syscall.Signal) error {
handle, status := p.handleTransientAcquire()
switch status {
case statusDone:
return ErrProcessDone
case statusReleased:
return errors.New("os: process already released")
}
defer p.handleTransientRelease()
return convertESRCH(unix.PidFDSendSignal(handle, s))
}
// pidfdWorks returns whether we can use pidfd on this system.
func pidfdWorks() bool {
return checkPidfdOnce() == nil
}
// checkPidfdOnce is used to only check whether pidfd works once.
var checkPidfdOnce = sync.OnceValue(checkPidfd)
// checkPidfd checks whether all required pidfd-related syscalls work. This
// consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with
// idtype of P_PIDFD, and clone(CLONE_PIDFD).
//
// Reasons for non-working pidfd syscalls include an older kernel and an
// execution environment in which the above system calls are restricted by
// seccomp or a similar technology.
func checkPidfd() error {
// In Android version < 12, pidfd-related system calls are not allowed
// by seccomp and trigger the SIGSYS signal. See issue #69065.
if runtime.GOOS == "android" {
ignoreSIGSYS()
defer restoreSIGSYS()
}
// Get a pidfd of the current process (opening of "/proc/self" won't
// work for waitid).
fd, err := unix.PidFDOpen(syscall.Getpid(), 0)
if err != nil {
return NewSyscallError("pidfd_open", err)
}
defer syscall.Close(int(fd))
// Check waitid(P_PIDFD) works.
err = ignoringEINTR(func() error {
return unix.Waitid(unix.P_PIDFD, int(fd), nil, syscall.WEXITED, nil)
})
// Expect ECHILD from waitid since we're not our own parent.
if err != syscall.ECHILD {
return NewSyscallError("pidfd_wait", err)
}
// Check pidfd_send_signal works (should be able to send 0 to itself).
if err := unix.PidFDSendSignal(fd, 0); err != nil {
return NewSyscallError("pidfd_send_signal", err)
}
// Verify that clone(CLONE_PIDFD) works.
//
// This shouldn't be necessary since pidfd_open was added in Linux 5.3,
// after CLONE_PIDFD in Linux 5.2, but some alternative Linux
// implementations may not adhere to this ordering.
if err := checkClonePidfd(); err != nil {
return err
}
return nil
}
// Provided by syscall.
//
//go:linkname checkClonePidfd
func checkClonePidfd() error
// Provided by runtime.
//
//go:linkname ignoreSIGSYS
func ignoreSIGSYS()
//go:linkname restoreSIGSYS
func restoreSIGSYS()