]>
Commit | Line | Data |
---|---|---|
d2c9b2a9 MF |
1 | /* Unshare daemonizer. |
2 | * Written by Mike Frysinger <vapier@gmail.com> | |
3 | * Released into the public domain. | |
4 | */ | |
5 | ||
6 | /* TODO: | |
d2c9b2a9 MF |
7 | * - Make pidns init optional. |
8 | * - Make setproctitle nicer and include program argv[0]. | |
c692d47f MF |
9 | * - Set up prctl(PR_SET_PDEATHSIG). |
10 | * - Set up prctl(PR_SET_CHILD_SUBREAPER). | |
d2c9b2a9 MF |
11 | */ |
12 | ||
13 | #define _GNU_SOURCE | |
14 | ||
15 | #include <err.h> | |
16 | #include <errno.h> | |
17 | #include <fcntl.h> | |
18 | #include <getopt.h> | |
19 | #include <sched.h> | |
20 | #include <signal.h> | |
21 | #include <stdbool.h> | |
22 | #include <stdint.h> | |
23 | #include <stdio.h> | |
24 | #include <stdlib.h> | |
25 | #include <string.h> | |
26 | #include <sysexits.h> | |
27 | #include <unistd.h> | |
28 | #include <net/if.h> | |
29 | #include <sys/ioctl.h> | |
30 | #include <sys/mount.h> | |
31 | #include <sys/prctl.h> | |
32 | #include <sys/socket.h> | |
33 | #include <sys/types.h> | |
34 | #include <sys/wait.h> | |
35 | ||
36 | #define PROG "vunshare" | |
37 | ||
38 | static bool vunshare(int flags) | |
39 | { | |
40 | if (unshare(flags) == -1) { | |
41 | if (errno != EINVAL) | |
42 | err(1, "unshare failed"); | |
43 | return false; | |
44 | } | |
45 | return true; | |
46 | } | |
47 | ||
48 | static void unshare_net(void) | |
49 | { | |
50 | if (!vunshare(CLONE_NEWNET)) | |
51 | return; | |
52 | ||
2b4610c7 | 53 | int sock; |
d2c9b2a9 MF |
54 | struct ifreq ifr; |
55 | ||
2b4610c7 MF |
56 | sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0); |
57 | if (sock < 0) | |
58 | err(1, "socket(AF_LOCAL) failed"); | |
59 | ||
d2c9b2a9 MF |
60 | /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */ |
61 | strcpy(ifr.ifr_name, "lo"); | |
62 | if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) | |
63 | err(1, "ioctl(SIOCGIFFLAGS) failed"); | |
2b4610c7 MF |
64 | |
65 | /* The kernel preserves ifr.ifr_name for use. */ | |
d2c9b2a9 MF |
66 | ifr.ifr_flags |= IFF_UP | IFF_RUNNING; |
67 | if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) | |
68 | err(1, "ioctl(SIOCSIFFLAGS) failed"); | |
ce7f5e52 MF |
69 | |
70 | close(sock); | |
d2c9b2a9 MF |
71 | } |
72 | ||
73 | static char **title_argv; | |
74 | static void setproctitle(const char *title) | |
75 | { | |
76 | /* Hopefully 1k is all we ever need. */ | |
77 | char newtitle[1024]; | |
78 | memset(newtitle, 0, sizeof(newtitle)); | |
79 | int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title); | |
80 | ||
81 | prctl(PR_SET_NAME, (uintptr_t)newtitle); | |
82 | ||
83 | /* Clobber argv to set the title. Need to figure out how much space though. */ | |
84 | int argc = 0; | |
85 | size_t i = 0; | |
86 | while (title_argv[argc]) | |
87 | i += strlen(title_argv[argc++]) + 1; | |
88 | /* Now scan the environ table. */ | |
89 | while (title_argv[argc]) | |
90 | i += strlen(title_argv[argc++]) + 1; | |
91 | ||
92 | if (i < len) | |
93 | newtitle[i] = '\0'; | |
94 | /* This will NUL pad the string for us too. */ | |
95 | strncpy(title_argv[0], newtitle, i); | |
96 | } | |
97 | ||
98 | static void close_fds(void) | |
99 | { | |
100 | int i; | |
101 | for (i = 3; i < 10; ++i) | |
102 | close(i); | |
103 | } | |
104 | ||
105 | static void exit_as_status_ext(int status) | |
106 | { | |
107 | int sig_status = 0; | |
108 | int exit_status = WEXITSTATUS(status); | |
109 | ||
110 | if (WIFSIGNALED(status)) { | |
111 | sig_status = WTERMSIG(status); | |
112 | } else if (exit_status > 128) { | |
113 | /* For the external init, translate the signal status back. | |
114 | * TODO: This gets it wrong when the child actually exited. | |
115 | * We need to set up a pipe between the two inits so we can | |
116 | * get back the proper details. | |
117 | */ | |
118 | sig_status = exit_status - 128; | |
119 | } | |
120 | ||
121 | if (sig_status) { | |
122 | signal(sig_status, SIG_DFL); | |
123 | kill(getpid(), sig_status); | |
124 | ||
125 | /* Still here ? Maybe the signal was masked. Just exit. */ | |
126 | exit_status = 128 + sig_status; | |
127 | } | |
128 | ||
129 | exit(exit_status); | |
130 | } | |
131 | ||
132 | static void exit_as_status_int(int status) | |
133 | { | |
134 | /* If we are the init for the pid ns, we can't kill ourselves -- | |
135 | * the kernel explicitly disallows this. Just exit with a high | |
136 | * status value instead. Our parent will handle it themselves. | |
137 | */ | |
138 | int exit_status; | |
139 | ||
140 | if (WIFSIGNALED(status)) | |
141 | exit_status = 128 + WTERMSIG(status); | |
142 | else | |
143 | exit_status = WEXITSTATUS(status); | |
144 | ||
145 | exit(exit_status); | |
146 | } | |
147 | ||
148 | static int reap_children(void) | |
149 | { | |
150 | pid_t pid; | |
151 | int status = 1; | |
152 | while (1) { | |
153 | pid = wait(&status); | |
154 | if (pid == -1) | |
155 | break; | |
156 | } | |
157 | return status; | |
158 | } | |
159 | ||
160 | static pid_t child_pid; | |
161 | static void signal_passthru(int sig, siginfo_t *siginfo, void *context) | |
162 | { | |
163 | if (getpid() == 1) { | |
164 | /* Internal init. */ | |
165 | ||
166 | /* If the signal is coming from our children, ignore it. | |
167 | * If it's coming from outside the pid ns, pass it along. | |
168 | */ | |
169 | if (siginfo->si_pid != 0) | |
170 | return; | |
171 | ||
172 | /* Kill all the children! */ | |
173 | kill(-1, sig); | |
174 | } else { | |
175 | /* External init. */ | |
176 | ||
177 | /* Just forward signal to the child. */ | |
178 | kill(child_pid, sig); | |
179 | } | |
180 | } | |
181 | ||
182 | /* We want to forward some signals to the child process. Block the rest. | |
183 | * We don't actually exit as we wait for the child to die/process the signal | |
184 | * first, and then we'll kill/exit after that point. | |
185 | */ | |
186 | static void setup_signal_handler(pid_t pid) | |
187 | { | |
188 | int i; | |
189 | ||
190 | struct sigaction sa = { | |
191 | .sa_sigaction = signal_passthru, | |
192 | .sa_flags = SA_SIGINFO | SA_RESTART, | |
193 | }; | |
194 | ||
195 | child_pid = pid; | |
196 | ||
36408122 | 197 | for (i = 1; i < SIGRTMIN; ++i) |
d2c9b2a9 MF |
198 | if (sigaction(i, &sa, NULL) && errno != EINVAL) |
199 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
200 | for (i = SIGRTMIN; i <= SIGRTMAX; ++i) | |
201 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
202 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
203 | ||
204 | /* As an init, we will reap the children via wait(). */ | |
205 | signal(SIGCHLD, SIG_DFL); | |
206 | } | |
207 | ||
208 | static bool unshare_pid(bool daemonize) | |
209 | { | |
210 | if (!vunshare(CLONE_NEWPID)) | |
211 | return false; | |
212 | ||
213 | pid_t pid; | |
214 | ||
215 | /* Set up external init process. */ | |
216 | pid = fork(); | |
217 | switch (pid) { | |
218 | case -1: err(1, "fork() failed"); | |
219 | case 0: break; | |
220 | default: | |
221 | if (daemonize) | |
222 | exit(0); | |
223 | setproctitle("ext init"); | |
224 | setup_signal_handler(pid); | |
225 | close_fds(); | |
226 | exit_as_status_ext(reap_children()); | |
227 | } | |
228 | ||
229 | if (daemonize) { | |
230 | if (setsid() == -1) | |
231 | err(1, "setsid() failed"); | |
232 | ||
233 | int fd = open("/dev/null", O_RDWR); | |
234 | if (fd == -1) | |
235 | err(1, "open(/dev/null) failed"); | |
236 | if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1) | |
237 | err(1, "dup2() failed"); | |
238 | if (fd > 2) | |
239 | close(fd); | |
240 | } | |
241 | ||
242 | /* Set up fresh /proc. */ | |
243 | if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, "")) | |
244 | err(1, "mount(/proc, MS_PRIVATE) failed"); | |
245 | if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "")) | |
246 | err(1, "mount(/proc) failed"); | |
247 | ||
248 | /* Set up internal init process. */ | |
249 | pid = fork(); | |
250 | switch (pid) { | |
251 | case -1: err(1, "fork() failed"); | |
252 | case 0: break; | |
253 | default: | |
254 | setproctitle("int init"); | |
255 | setup_signal_handler(pid); | |
256 | close_fds(); | |
257 | exit_as_status_int(reap_children()); | |
258 | } | |
259 | ||
260 | return true; | |
261 | } | |
262 | ||
263 | static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid) | |
264 | { | |
265 | FILE *fp; | |
266 | ||
267 | fp = fopen("/proc/self/setgroups", "w"); | |
268 | if (fp) { | |
269 | fputs("deny\n", fp); | |
270 | fclose(fp); | |
271 | } | |
272 | ||
273 | fp = fopen("/proc/self/uid_map", "w"); | |
274 | fprintf(fp, "%u %u 1\n", iuid, ouid); | |
275 | fclose(fp); | |
276 | ||
277 | fp = fopen("/proc/self/gid_map", "w"); | |
278 | fprintf(fp, "%u %u 1\n", igid, ogid); | |
279 | fclose(fp); | |
280 | } | |
281 | ||
282 | #define a_argument required_argument | |
283 | static const struct option opts[] = { | |
284 | { "pid", a_argument, NULL, 1 }, | |
285 | { NULL, 0, NULL, 0 }, | |
286 | }; | |
287 | ||
288 | static void usage(void) | |
289 | { | |
89f2cda5 MF |
290 | puts( |
291 | "Usage: unshare [options] <program>\n" | |
292 | "\n" | |
293 | "Options: [DimnpuU]\n" | |
294 | " -i Use IPC namespaces\n" | |
295 | " -m Use mount namespaces\n" | |
296 | " -n Use net namespaces\n" | |
297 | " -p Use pid namespaces\n" | |
298 | " -u Use UTS namespaces\n" | |
299 | " -U Use user namespaces\n" | |
300 | " -D Daemonize program" | |
301 | ); | |
d2c9b2a9 MF |
302 | exit(EX_USAGE); |
303 | } | |
304 | ||
305 | int main(int argc, char *argv[]) | |
306 | { | |
307 | int c; | |
308 | FILE *pidfp; | |
309 | const char *pid = NULL; | |
310 | bool newipc = false; | |
311 | bool newmnt = false; | |
312 | bool newnet = false; | |
313 | bool newpid = false; | |
314 | bool newuts = false; | |
315 | bool newusr = false; | |
316 | bool daemonize = false; | |
317 | uid_t uid; | |
318 | gid_t gid; | |
319 | ||
320 | title_argv = argv; | |
321 | ||
322 | while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) { | |
323 | switch (c) { | |
324 | case 1: | |
325 | pid = optarg; | |
326 | break; | |
327 | case 'i': newipc = true; break; | |
328 | case 'm': newmnt = true; break; | |
329 | case 'n': newnet = true; break; | |
330 | case 'p': newpid = true; break; | |
331 | case 'u': newuts = true; break; | |
332 | case 'U': newusr = true; break; | |
333 | case 'D': daemonize = true; break; | |
334 | default: | |
335 | usage(); | |
336 | } | |
337 | } | |
338 | argc -= optind; | |
339 | argv += optind; | |
340 | ||
341 | if (argc == 0) | |
342 | usage(); | |
343 | ||
344 | if (newusr) { | |
345 | uid = getuid(); | |
346 | gid = getgid(); | |
347 | if (vunshare(CLONE_NEWUSER)) | |
348 | map_uid_gid(0, 0, uid, gid); | |
349 | else | |
350 | newusr = false; | |
351 | } | |
352 | ||
353 | if (newmnt || newpid) | |
354 | vunshare(CLONE_NEWNS); | |
355 | if (newuts) | |
356 | vunshare(CLONE_NEWUTS); | |
357 | if (newipc) | |
358 | vunshare(CLONE_NEWIPC); | |
359 | if (newnet) | |
360 | unshare_net(); | |
361 | ||
362 | if (pid) { | |
363 | pidfp = fopen(pid, "we"); | |
364 | if (pidfp == NULL) | |
365 | err(1, "fopen(%s) failed", pid); | |
366 | } | |
367 | ||
368 | if (newpid && unshare_pid(daemonize)) { | |
369 | /* Nothing. */ | |
370 | } else if (daemonize) | |
371 | if (daemon(1, 0)) | |
372 | err(1, "daemon() failed"); | |
373 | ||
374 | if (pid) { | |
375 | fprintf(pidfp, "%u\n", getpid()); | |
376 | fclose(pidfp); | |
377 | } | |
378 | ||
379 | if (newusr) | |
380 | if (vunshare(CLONE_NEWUSER)) | |
381 | map_uid_gid(uid, gid, 0, 0); | |
382 | ||
383 | execvp(argv[0], argv); | |
384 | fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); | |
385 | return 127; | |
386 | } |