]>
Commit | Line | Data |
---|---|---|
d2c9b2a9 MF |
1 | /* Unshare daemonizer. |
2 | * Written by Mike Frysinger <vapier@gmail.com> | |
3 | * Released into the public domain. | |
4 | */ | |
5 | ||
6 | /* TODO: | |
7 | * - Add userns support. | |
8 | * - Make pidns init optional. | |
9 | * - Make setproctitle nicer and include program argv[0]. | |
c692d47f MF |
10 | * - Set up prctl(PR_SET_PDEATHSIG). |
11 | * - Set up prctl(PR_SET_CHILD_SUBREAPER). | |
d2c9b2a9 MF |
12 | */ |
13 | ||
14 | #define _GNU_SOURCE | |
15 | ||
16 | #include <err.h> | |
17 | #include <errno.h> | |
18 | #include <fcntl.h> | |
19 | #include <getopt.h> | |
20 | #include <sched.h> | |
21 | #include <signal.h> | |
22 | #include <stdbool.h> | |
23 | #include <stdint.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | #include <sysexits.h> | |
28 | #include <unistd.h> | |
29 | #include <net/if.h> | |
30 | #include <sys/ioctl.h> | |
31 | #include <sys/mount.h> | |
32 | #include <sys/prctl.h> | |
33 | #include <sys/socket.h> | |
34 | #include <sys/types.h> | |
35 | #include <sys/wait.h> | |
36 | ||
37 | #define PROG "vunshare" | |
38 | ||
39 | static bool vunshare(int flags) | |
40 | { | |
41 | if (unshare(flags) == -1) { | |
42 | if (errno != EINVAL) | |
43 | err(1, "unshare failed"); | |
44 | return false; | |
45 | } | |
46 | return true; | |
47 | } | |
48 | ||
49 | static void unshare_net(void) | |
50 | { | |
51 | if (!vunshare(CLONE_NEWNET)) | |
52 | return; | |
53 | ||
2b4610c7 | 54 | int sock; |
d2c9b2a9 MF |
55 | struct ifreq ifr; |
56 | ||
2b4610c7 MF |
57 | sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0); |
58 | if (sock < 0) | |
59 | err(1, "socket(AF_LOCAL) failed"); | |
60 | ||
d2c9b2a9 MF |
61 | /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */ |
62 | strcpy(ifr.ifr_name, "lo"); | |
63 | if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) | |
64 | err(1, "ioctl(SIOCGIFFLAGS) failed"); | |
2b4610c7 MF |
65 | |
66 | /* The kernel preserves ifr.ifr_name for use. */ | |
d2c9b2a9 MF |
67 | ifr.ifr_flags |= IFF_UP | IFF_RUNNING; |
68 | if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) | |
69 | err(1, "ioctl(SIOCSIFFLAGS) failed"); | |
ce7f5e52 MF |
70 | |
71 | close(sock); | |
d2c9b2a9 MF |
72 | } |
73 | ||
74 | static char **title_argv; | |
75 | static void setproctitle(const char *title) | |
76 | { | |
77 | /* Hopefully 1k is all we ever need. */ | |
78 | char newtitle[1024]; | |
79 | memset(newtitle, 0, sizeof(newtitle)); | |
80 | int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title); | |
81 | ||
82 | prctl(PR_SET_NAME, (uintptr_t)newtitle); | |
83 | ||
84 | /* Clobber argv to set the title. Need to figure out how much space though. */ | |
85 | int argc = 0; | |
86 | size_t i = 0; | |
87 | while (title_argv[argc]) | |
88 | i += strlen(title_argv[argc++]) + 1; | |
89 | /* Now scan the environ table. */ | |
90 | while (title_argv[argc]) | |
91 | i += strlen(title_argv[argc++]) + 1; | |
92 | ||
93 | if (i < len) | |
94 | newtitle[i] = '\0'; | |
95 | /* This will NUL pad the string for us too. */ | |
96 | strncpy(title_argv[0], newtitle, i); | |
97 | } | |
98 | ||
99 | static void close_fds(void) | |
100 | { | |
101 | int i; | |
102 | for (i = 3; i < 10; ++i) | |
103 | close(i); | |
104 | } | |
105 | ||
106 | static void exit_as_status_ext(int status) | |
107 | { | |
108 | int sig_status = 0; | |
109 | int exit_status = WEXITSTATUS(status); | |
110 | ||
111 | if (WIFSIGNALED(status)) { | |
112 | sig_status = WTERMSIG(status); | |
113 | } else if (exit_status > 128) { | |
114 | /* For the external init, translate the signal status back. | |
115 | * TODO: This gets it wrong when the child actually exited. | |
116 | * We need to set up a pipe between the two inits so we can | |
117 | * get back the proper details. | |
118 | */ | |
119 | sig_status = exit_status - 128; | |
120 | } | |
121 | ||
122 | if (sig_status) { | |
123 | signal(sig_status, SIG_DFL); | |
124 | kill(getpid(), sig_status); | |
125 | ||
126 | /* Still here ? Maybe the signal was masked. Just exit. */ | |
127 | exit_status = 128 + sig_status; | |
128 | } | |
129 | ||
130 | exit(exit_status); | |
131 | } | |
132 | ||
133 | static void exit_as_status_int(int status) | |
134 | { | |
135 | /* If we are the init for the pid ns, we can't kill ourselves -- | |
136 | * the kernel explicitly disallows this. Just exit with a high | |
137 | * status value instead. Our parent will handle it themselves. | |
138 | */ | |
139 | int exit_status; | |
140 | ||
141 | if (WIFSIGNALED(status)) | |
142 | exit_status = 128 + WTERMSIG(status); | |
143 | else | |
144 | exit_status = WEXITSTATUS(status); | |
145 | ||
146 | exit(exit_status); | |
147 | } | |
148 | ||
149 | static int reap_children(void) | |
150 | { | |
151 | pid_t pid; | |
152 | int status = 1; | |
153 | while (1) { | |
154 | pid = wait(&status); | |
155 | if (pid == -1) | |
156 | break; | |
157 | } | |
158 | return status; | |
159 | } | |
160 | ||
161 | static pid_t child_pid; | |
162 | static void signal_passthru(int sig, siginfo_t *siginfo, void *context) | |
163 | { | |
164 | if (getpid() == 1) { | |
165 | /* Internal init. */ | |
166 | ||
167 | /* If the signal is coming from our children, ignore it. | |
168 | * If it's coming from outside the pid ns, pass it along. | |
169 | */ | |
170 | if (siginfo->si_pid != 0) | |
171 | return; | |
172 | ||
173 | /* Kill all the children! */ | |
174 | kill(-1, sig); | |
175 | } else { | |
176 | /* External init. */ | |
177 | ||
178 | /* Just forward signal to the child. */ | |
179 | kill(child_pid, sig); | |
180 | } | |
181 | } | |
182 | ||
183 | /* We want to forward some signals to the child process. Block the rest. | |
184 | * We don't actually exit as we wait for the child to die/process the signal | |
185 | * first, and then we'll kill/exit after that point. | |
186 | */ | |
187 | static void setup_signal_handler(pid_t pid) | |
188 | { | |
189 | int i; | |
190 | ||
191 | struct sigaction sa = { | |
192 | .sa_sigaction = signal_passthru, | |
193 | .sa_flags = SA_SIGINFO | SA_RESTART, | |
194 | }; | |
195 | ||
196 | child_pid = pid; | |
197 | ||
198 | for (i = 1; i < SIGUNUSED; ++i) | |
199 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
200 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
201 | for (i = SIGRTMIN; i <= SIGRTMAX; ++i) | |
202 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
203 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
204 | ||
205 | /* As an init, we will reap the children via wait(). */ | |
206 | signal(SIGCHLD, SIG_DFL); | |
207 | } | |
208 | ||
209 | static bool unshare_pid(bool daemonize) | |
210 | { | |
211 | if (!vunshare(CLONE_NEWPID)) | |
212 | return false; | |
213 | ||
214 | pid_t pid; | |
215 | ||
216 | /* Set up external init process. */ | |
217 | pid = fork(); | |
218 | switch (pid) { | |
219 | case -1: err(1, "fork() failed"); | |
220 | case 0: break; | |
221 | default: | |
222 | if (daemonize) | |
223 | exit(0); | |
224 | setproctitle("ext init"); | |
225 | setup_signal_handler(pid); | |
226 | close_fds(); | |
227 | exit_as_status_ext(reap_children()); | |
228 | } | |
229 | ||
230 | if (daemonize) { | |
231 | if (setsid() == -1) | |
232 | err(1, "setsid() failed"); | |
233 | ||
234 | int fd = open("/dev/null", O_RDWR); | |
235 | if (fd == -1) | |
236 | err(1, "open(/dev/null) failed"); | |
237 | if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1) | |
238 | err(1, "dup2() failed"); | |
239 | if (fd > 2) | |
240 | close(fd); | |
241 | } | |
242 | ||
243 | /* Set up fresh /proc. */ | |
244 | if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, "")) | |
245 | err(1, "mount(/proc, MS_PRIVATE) failed"); | |
246 | if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "")) | |
247 | err(1, "mount(/proc) failed"); | |
248 | ||
249 | /* Set up internal init process. */ | |
250 | pid = fork(); | |
251 | switch (pid) { | |
252 | case -1: err(1, "fork() failed"); | |
253 | case 0: break; | |
254 | default: | |
255 | setproctitle("int init"); | |
256 | setup_signal_handler(pid); | |
257 | close_fds(); | |
258 | exit_as_status_int(reap_children()); | |
259 | } | |
260 | ||
261 | return true; | |
262 | } | |
263 | ||
264 | static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid) | |
265 | { | |
266 | FILE *fp; | |
267 | ||
268 | fp = fopen("/proc/self/setgroups", "w"); | |
269 | if (fp) { | |
270 | fputs("deny\n", fp); | |
271 | fclose(fp); | |
272 | } | |
273 | ||
274 | fp = fopen("/proc/self/uid_map", "w"); | |
275 | fprintf(fp, "%u %u 1\n", iuid, ouid); | |
276 | fclose(fp); | |
277 | ||
278 | fp = fopen("/proc/self/gid_map", "w"); | |
279 | fprintf(fp, "%u %u 1\n", igid, ogid); | |
280 | fclose(fp); | |
281 | } | |
282 | ||
283 | #define a_argument required_argument | |
284 | static const struct option opts[] = { | |
285 | { "pid", a_argument, NULL, 1 }, | |
286 | { NULL, 0, NULL, 0 }, | |
287 | }; | |
288 | ||
289 | static void usage(void) | |
290 | { | |
291 | puts("Usage: unshare [options] <program>"); | |
292 | exit(EX_USAGE); | |
293 | } | |
294 | ||
295 | int main(int argc, char *argv[]) | |
296 | { | |
297 | int c; | |
298 | FILE *pidfp; | |
299 | const char *pid = NULL; | |
300 | bool newipc = false; | |
301 | bool newmnt = false; | |
302 | bool newnet = false; | |
303 | bool newpid = false; | |
304 | bool newuts = false; | |
305 | bool newusr = false; | |
306 | bool daemonize = false; | |
307 | uid_t uid; | |
308 | gid_t gid; | |
309 | ||
310 | title_argv = argv; | |
311 | ||
312 | while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) { | |
313 | switch (c) { | |
314 | case 1: | |
315 | pid = optarg; | |
316 | break; | |
317 | case 'i': newipc = true; break; | |
318 | case 'm': newmnt = true; break; | |
319 | case 'n': newnet = true; break; | |
320 | case 'p': newpid = true; break; | |
321 | case 'u': newuts = true; break; | |
322 | case 'U': newusr = true; break; | |
323 | case 'D': daemonize = true; break; | |
324 | default: | |
325 | usage(); | |
326 | } | |
327 | } | |
328 | argc -= optind; | |
329 | argv += optind; | |
330 | ||
331 | if (argc == 0) | |
332 | usage(); | |
333 | ||
334 | if (newusr) { | |
335 | uid = getuid(); | |
336 | gid = getgid(); | |
337 | if (vunshare(CLONE_NEWUSER)) | |
338 | map_uid_gid(0, 0, uid, gid); | |
339 | else | |
340 | newusr = false; | |
341 | } | |
342 | ||
343 | if (newmnt || newpid) | |
344 | vunshare(CLONE_NEWNS); | |
345 | if (newuts) | |
346 | vunshare(CLONE_NEWUTS); | |
347 | if (newipc) | |
348 | vunshare(CLONE_NEWIPC); | |
349 | if (newnet) | |
350 | unshare_net(); | |
351 | ||
352 | if (pid) { | |
353 | pidfp = fopen(pid, "we"); | |
354 | if (pidfp == NULL) | |
355 | err(1, "fopen(%s) failed", pid); | |
356 | } | |
357 | ||
358 | if (newpid && unshare_pid(daemonize)) { | |
359 | /* Nothing. */ | |
360 | } else if (daemonize) | |
361 | if (daemon(1, 0)) | |
362 | err(1, "daemon() failed"); | |
363 | ||
364 | if (pid) { | |
365 | fprintf(pidfp, "%u\n", getpid()); | |
366 | fclose(pidfp); | |
367 | } | |
368 | ||
369 | if (newusr) | |
370 | if (vunshare(CLONE_NEWUSER)) | |
371 | map_uid_gid(uid, gid, 0, 0); | |
372 | ||
373 | execvp(argv[0], argv); | |
374 | fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); | |
375 | return 127; | |
376 | } |