Skip to content

Commit d28451d

Browse files
committed
Avoid using two forks
One fork is enough, not only when eating. The problem with the two forks was that it destroyed the relationship between the supervisor and the process to be supervised: ┌─────────────┐ │Main process │ └┬───────────┬┘ ┌▽─────────┐┌▽─────────┐ │Supervisor││Supervised│ └──────────┘└──────────┘ This meant that the supervised process lost the predefined relationship with the supervisor, thus requiring a more loose /proc/sys/kernel/yama/ptrace_scope if the supervisor wants to read memory. With the new setup, the main process is the supervisor. ┌──────────┐ │Supervisor│ └┬─────────┘ ┌▽─────────┐ │Supervised│ └──────────┘ Thus this allows it to work with restricted ptrace permissions as well. Now the final annoying implementation detail is that ioctl(SECCOMP_IOCTL_NOTIF_ADDFD) might block forever when the child exits (see man seccomp_unotify(2)), so we still have to resort to some rather ugly exit() hack, instead of just letting the process clean up itself. There already has been some discussion [0] around improving this ioctl behaviour and even a suggested patch [1], but for now this is how we have to deal with the situation. [0] https://lore.kernel.org/linux-man/CAG48ez2xn+_KznEztJ-eVTsTzkbf9CVgPqaAk7TpRNAqbdaRoA@mail.gmail.com/ [1] https://lists.linuxfoundation.org/pipermail/containers/2020-November/042590.html
1 parent acb0f9e commit d28451d

File tree

1 file changed

+49
-45
lines changed

1 file changed

+49
-45
lines changed

src/bin/seccomp/seccomp_exec.c

+49-45
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ static const int scalls[] = {
1717
__NR_openat2,
1818
};
1919

20+
void handle_child_exit(int) {
21+
// For now ok like this...
22+
// Ideally we want to perform the cleanup in a more elegant way, than just straight up exiting.
23+
// But there are bugs, that cause ioctl(SECCOMP_IOCTL_NOTIF_ADDFD) to block forever when the child exits,
24+
// so it is easier for now to do it this way.
25+
exit(0);
26+
}
27+
2028
int seccomp_child(const char *file, char *const argv[], struct seccomp_state *state) {
2129
// agree to not gain any new privs, see man 2 seccomp section SECCOMP_SET_MODE_FILTER
2230
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
@@ -49,55 +57,51 @@ int seccomp_parent(struct seccomp_state *state) {
4957
return -1;
5058
}
5159

52-
// fork is not strictly necessary, but this way we can just wait for the tracee to exit and kill the tracer
53-
pid_t tracer = fork();
54-
if (tracer) {
55-
// parent
56-
int status, result = 0;
57-
close(state->listener);
58-
waitpid(state->task_pid, &status, 0);
59-
if (!WIFEXITED(status) || WEXITSTATUS(status)) {
60-
result = 1;
61-
}
62-
kill(tracer, SIGKILL);
63-
return result;
64-
} else {
65-
// child
66-
struct seccomp_notif *req;
67-
struct seccomp_notif_resp *resp;
68-
struct seccomp_notif_sizes sizes;
69-
70-
if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
71-
perror("seccomp(GET_NOTIF_SIZES)");
72-
return -1;
60+
// install a signal handler so that we can quit the supervisor, once the target has terminated
61+
struct sigaction sa;
62+
sa.sa_handler = handle_child_exit;
63+
sa.sa_flags = 0;
64+
sigemptyset(&sa.sa_mask);
65+
if (sigaction(SIGCHLD, &sa, NULL) == -1) {
66+
perror("sigaction(SIGCHLD)");
67+
return -1;
68+
}
69+
70+
// setup supervisor
71+
struct seccomp_notif *req;
72+
struct seccomp_notif_resp *resp;
73+
struct seccomp_notif_sizes sizes;
74+
75+
if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
76+
perror("seccomp(GET_NOTIF_SIZES)");
77+
return -1;
78+
}
79+
req = malloc(sizes.seccomp_notif);
80+
resp = malloc(sizes.seccomp_notif_resp);
81+
memset(resp, 0, sizes.seccomp_notif_resp);
82+
83+
while (1) {
84+
memset(req, 0, sizes.seccomp_notif);
85+
if (ioctl(state->listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
86+
perror("ioctl recv");
87+
break;
7388
}
74-
req = malloc(sizes.seccomp_notif);
75-
resp = malloc(sizes.seccomp_notif_resp);
76-
memset(resp, 0, sizes.seccomp_notif_resp);
77-
78-
while (1) {
79-
memset(req, 0, sizes.seccomp_notif);
80-
if (ioctl(state->listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
81-
perror("ioctl recv");
82-
break;
83-
}
84-
if (handle_req(req, resp, state->listener) < 0) {
85-
break;
86-
}
87-
88-
// task got a signal and restarted the syscall
89-
if (ioctl(state->listener, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0 && errno != ENOENT) {
90-
perror("ioctl send");
91-
break;
92-
}
89+
if (handle_req(req, resp, state->listener) < 0) {
90+
break;
9391
}
9492

95-
// cleanup
96-
free(resp);
97-
free(req);
98-
close(state->listener);
99-
exit(EXIT_FAILURE);
93+
// task got a signal and restarted the syscall
94+
if (ioctl(state->listener, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0 && errno != ENOENT) {
95+
perror("ioctl send");
96+
break;
97+
}
10098
}
99+
100+
// cleanup
101+
free(resp);
102+
free(req);
103+
close(state->listener);
104+
exit(EXIT_FAILURE);
101105
}
102106

103107
int seccomp_exec(const char *file, char *const argv[]) {

0 commit comments

Comments
 (0)