]>
Commit | Line | Data |
---|---|---|
d2c9b2a9 MF |
1 | /* Unshare daemonizer. |
2 | * Written by Mike Frysinger <vapier@gmail.com> | |
3 | * Released into the public domain. | |
4 | */ | |
5 | ||
6 | /* TODO: | |
7 | * - Add userns support. | |
8 | * - Make pidns init optional. | |
9 | * - Make setproctitle nicer and include program argv[0]. | |
c692d47f MF |
10 | * - Set up prctl(PR_SET_PDEATHSIG). |
11 | * - Set up prctl(PR_SET_CHILD_SUBREAPER). | |
d2c9b2a9 MF |
12 | */ |
13 | ||
14 | #define _GNU_SOURCE | |
15 | ||
16 | #include <err.h> | |
17 | #include <errno.h> | |
18 | #include <fcntl.h> | |
19 | #include <getopt.h> | |
20 | #include <sched.h> | |
21 | #include <signal.h> | |
22 | #include <stdbool.h> | |
23 | #include <stdint.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | #include <sysexits.h> | |
28 | #include <unistd.h> | |
29 | #include <net/if.h> | |
30 | #include <sys/ioctl.h> | |
31 | #include <sys/mount.h> | |
32 | #include <sys/prctl.h> | |
33 | #include <sys/socket.h> | |
34 | #include <sys/types.h> | |
35 | #include <sys/wait.h> | |
36 | ||
37 | #define PROG "vunshare" | |
38 | ||
39 | static bool vunshare(int flags) | |
40 | { | |
41 | if (unshare(flags) == -1) { | |
42 | if (errno != EINVAL) | |
43 | err(1, "unshare failed"); | |
44 | return false; | |
45 | } | |
46 | return true; | |
47 | } | |
48 | ||
49 | static void unshare_net(void) | |
50 | { | |
51 | if (!vunshare(CLONE_NEWNET)) | |
52 | return; | |
53 | ||
54 | int sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0); | |
55 | struct ifreq ifr; | |
56 | ||
57 | /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */ | |
58 | strcpy(ifr.ifr_name, "lo"); | |
59 | if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) | |
60 | err(1, "ioctl(SIOCGIFFLAGS) failed"); | |
61 | strcpy(ifr.ifr_name, "lo"); | |
62 | ifr.ifr_flags |= IFF_UP | IFF_RUNNING; | |
63 | if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) | |
64 | err(1, "ioctl(SIOCSIFFLAGS) failed"); | |
ce7f5e52 MF |
65 | |
66 | close(sock); | |
d2c9b2a9 MF |
67 | } |
68 | ||
69 | static char **title_argv; | |
70 | static void setproctitle(const char *title) | |
71 | { | |
72 | /* Hopefully 1k is all we ever need. */ | |
73 | char newtitle[1024]; | |
74 | memset(newtitle, 0, sizeof(newtitle)); | |
75 | int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title); | |
76 | ||
77 | prctl(PR_SET_NAME, (uintptr_t)newtitle); | |
78 | ||
79 | /* Clobber argv to set the title. Need to figure out how much space though. */ | |
80 | int argc = 0; | |
81 | size_t i = 0; | |
82 | while (title_argv[argc]) | |
83 | i += strlen(title_argv[argc++]) + 1; | |
84 | /* Now scan the environ table. */ | |
85 | while (title_argv[argc]) | |
86 | i += strlen(title_argv[argc++]) + 1; | |
87 | ||
88 | if (i < len) | |
89 | newtitle[i] = '\0'; | |
90 | /* This will NUL pad the string for us too. */ | |
91 | strncpy(title_argv[0], newtitle, i); | |
92 | } | |
93 | ||
94 | static void close_fds(void) | |
95 | { | |
96 | int i; | |
97 | for (i = 3; i < 10; ++i) | |
98 | close(i); | |
99 | } | |
100 | ||
101 | static void exit_as_status_ext(int status) | |
102 | { | |
103 | int sig_status = 0; | |
104 | int exit_status = WEXITSTATUS(status); | |
105 | ||
106 | if (WIFSIGNALED(status)) { | |
107 | sig_status = WTERMSIG(status); | |
108 | } else if (exit_status > 128) { | |
109 | /* For the external init, translate the signal status back. | |
110 | * TODO: This gets it wrong when the child actually exited. | |
111 | * We need to set up a pipe between the two inits so we can | |
112 | * get back the proper details. | |
113 | */ | |
114 | sig_status = exit_status - 128; | |
115 | } | |
116 | ||
117 | if (sig_status) { | |
118 | signal(sig_status, SIG_DFL); | |
119 | kill(getpid(), sig_status); | |
120 | ||
121 | /* Still here ? Maybe the signal was masked. Just exit. */ | |
122 | exit_status = 128 + sig_status; | |
123 | } | |
124 | ||
125 | exit(exit_status); | |
126 | } | |
127 | ||
128 | static void exit_as_status_int(int status) | |
129 | { | |
130 | /* If we are the init for the pid ns, we can't kill ourselves -- | |
131 | * the kernel explicitly disallows this. Just exit with a high | |
132 | * status value instead. Our parent will handle it themselves. | |
133 | */ | |
134 | int exit_status; | |
135 | ||
136 | if (WIFSIGNALED(status)) | |
137 | exit_status = 128 + WTERMSIG(status); | |
138 | else | |
139 | exit_status = WEXITSTATUS(status); | |
140 | ||
141 | exit(exit_status); | |
142 | } | |
143 | ||
144 | static int reap_children(void) | |
145 | { | |
146 | pid_t pid; | |
147 | int status = 1; | |
148 | while (1) { | |
149 | pid = wait(&status); | |
150 | if (pid == -1) | |
151 | break; | |
152 | } | |
153 | return status; | |
154 | } | |
155 | ||
156 | static pid_t child_pid; | |
157 | static void signal_passthru(int sig, siginfo_t *siginfo, void *context) | |
158 | { | |
159 | if (getpid() == 1) { | |
160 | /* Internal init. */ | |
161 | ||
162 | /* If the signal is coming from our children, ignore it. | |
163 | * If it's coming from outside the pid ns, pass it along. | |
164 | */ | |
165 | if (siginfo->si_pid != 0) | |
166 | return; | |
167 | ||
168 | /* Kill all the children! */ | |
169 | kill(-1, sig); | |
170 | } else { | |
171 | /* External init. */ | |
172 | ||
173 | /* Just forward signal to the child. */ | |
174 | kill(child_pid, sig); | |
175 | } | |
176 | } | |
177 | ||
178 | /* We want to forward some signals to the child process. Block the rest. | |
179 | * We don't actually exit as we wait for the child to die/process the signal | |
180 | * first, and then we'll kill/exit after that point. | |
181 | */ | |
182 | static void setup_signal_handler(pid_t pid) | |
183 | { | |
184 | int i; | |
185 | ||
186 | struct sigaction sa = { | |
187 | .sa_sigaction = signal_passthru, | |
188 | .sa_flags = SA_SIGINFO | SA_RESTART, | |
189 | }; | |
190 | ||
191 | child_pid = pid; | |
192 | ||
193 | for (i = 1; i < SIGUNUSED; ++i) | |
194 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
195 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
196 | for (i = SIGRTMIN; i <= SIGRTMAX; ++i) | |
197 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
198 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
199 | ||
200 | /* As an init, we will reap the children via wait(). */ | |
201 | signal(SIGCHLD, SIG_DFL); | |
202 | } | |
203 | ||
204 | static bool unshare_pid(bool daemonize) | |
205 | { | |
206 | if (!vunshare(CLONE_NEWPID)) | |
207 | return false; | |
208 | ||
209 | pid_t pid; | |
210 | ||
211 | /* Set up external init process. */ | |
212 | pid = fork(); | |
213 | switch (pid) { | |
214 | case -1: err(1, "fork() failed"); | |
215 | case 0: break; | |
216 | default: | |
217 | if (daemonize) | |
218 | exit(0); | |
219 | setproctitle("ext init"); | |
220 | setup_signal_handler(pid); | |
221 | close_fds(); | |
222 | exit_as_status_ext(reap_children()); | |
223 | } | |
224 | ||
225 | if (daemonize) { | |
226 | if (setsid() == -1) | |
227 | err(1, "setsid() failed"); | |
228 | ||
229 | int fd = open("/dev/null", O_RDWR); | |
230 | if (fd == -1) | |
231 | err(1, "open(/dev/null) failed"); | |
232 | if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1) | |
233 | err(1, "dup2() failed"); | |
234 | if (fd > 2) | |
235 | close(fd); | |
236 | } | |
237 | ||
238 | /* Set up fresh /proc. */ | |
239 | if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, "")) | |
240 | err(1, "mount(/proc, MS_PRIVATE) failed"); | |
241 | if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "")) | |
242 | err(1, "mount(/proc) failed"); | |
243 | ||
244 | /* Set up internal init process. */ | |
245 | pid = fork(); | |
246 | switch (pid) { | |
247 | case -1: err(1, "fork() failed"); | |
248 | case 0: break; | |
249 | default: | |
250 | setproctitle("int init"); | |
251 | setup_signal_handler(pid); | |
252 | close_fds(); | |
253 | exit_as_status_int(reap_children()); | |
254 | } | |
255 | ||
256 | return true; | |
257 | } | |
258 | ||
259 | static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid) | |
260 | { | |
261 | FILE *fp; | |
262 | ||
263 | fp = fopen("/proc/self/setgroups", "w"); | |
264 | if (fp) { | |
265 | fputs("deny\n", fp); | |
266 | fclose(fp); | |
267 | } | |
268 | ||
269 | fp = fopen("/proc/self/uid_map", "w"); | |
270 | fprintf(fp, "%u %u 1\n", iuid, ouid); | |
271 | fclose(fp); | |
272 | ||
273 | fp = fopen("/proc/self/gid_map", "w"); | |
274 | fprintf(fp, "%u %u 1\n", igid, ogid); | |
275 | fclose(fp); | |
276 | } | |
277 | ||
278 | #define a_argument required_argument | |
279 | static const struct option opts[] = { | |
280 | { "pid", a_argument, NULL, 1 }, | |
281 | { NULL, 0, NULL, 0 }, | |
282 | }; | |
283 | ||
284 | static void usage(void) | |
285 | { | |
286 | puts("Usage: unshare [options] <program>"); | |
287 | exit(EX_USAGE); | |
288 | } | |
289 | ||
290 | int main(int argc, char *argv[]) | |
291 | { | |
292 | int c; | |
293 | FILE *pidfp; | |
294 | const char *pid = NULL; | |
295 | bool newipc = false; | |
296 | bool newmnt = false; | |
297 | bool newnet = false; | |
298 | bool newpid = false; | |
299 | bool newuts = false; | |
300 | bool newusr = false; | |
301 | bool daemonize = false; | |
302 | uid_t uid; | |
303 | gid_t gid; | |
304 | ||
305 | title_argv = argv; | |
306 | ||
307 | while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) { | |
308 | switch (c) { | |
309 | case 1: | |
310 | pid = optarg; | |
311 | break; | |
312 | case 'i': newipc = true; break; | |
313 | case 'm': newmnt = true; break; | |
314 | case 'n': newnet = true; break; | |
315 | case 'p': newpid = true; break; | |
316 | case 'u': newuts = true; break; | |
317 | case 'U': newusr = true; break; | |
318 | case 'D': daemonize = true; break; | |
319 | default: | |
320 | usage(); | |
321 | } | |
322 | } | |
323 | argc -= optind; | |
324 | argv += optind; | |
325 | ||
326 | if (argc == 0) | |
327 | usage(); | |
328 | ||
329 | if (newusr) { | |
330 | uid = getuid(); | |
331 | gid = getgid(); | |
332 | if (vunshare(CLONE_NEWUSER)) | |
333 | map_uid_gid(0, 0, uid, gid); | |
334 | else | |
335 | newusr = false; | |
336 | } | |
337 | ||
338 | if (newmnt || newpid) | |
339 | vunshare(CLONE_NEWNS); | |
340 | if (newuts) | |
341 | vunshare(CLONE_NEWUTS); | |
342 | if (newipc) | |
343 | vunshare(CLONE_NEWIPC); | |
344 | if (newnet) | |
345 | unshare_net(); | |
346 | ||
347 | if (pid) { | |
348 | pidfp = fopen(pid, "we"); | |
349 | if (pidfp == NULL) | |
350 | err(1, "fopen(%s) failed", pid); | |
351 | } | |
352 | ||
353 | if (newpid && unshare_pid(daemonize)) { | |
354 | /* Nothing. */ | |
355 | } else if (daemonize) | |
356 | if (daemon(1, 0)) | |
357 | err(1, "daemon() failed"); | |
358 | ||
359 | if (pid) { | |
360 | fprintf(pidfp, "%u\n", getpid()); | |
361 | fclose(pidfp); | |
362 | } | |
363 | ||
364 | if (newusr) | |
365 | if (vunshare(CLONE_NEWUSER)) | |
366 | map_uid_gid(uid, gid, 0, 0); | |
367 | ||
368 | execvp(argv[0], argv); | |
369 | fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); | |
370 | return 127; | |
371 | } |