]>
Commit | Line | Data |
---|---|---|
d2c9b2a9 MF |
1 | /* Unshare daemonizer. |
2 | * Written by Mike Frysinger <vapier@gmail.com> | |
3 | * Released into the public domain. | |
4 | */ | |
5 | ||
6 | /* TODO: | |
7 | * - Add userns support. | |
8 | * - Make pidns init optional. | |
9 | * - Make setproctitle nicer and include program argv[0]. | |
c692d47f MF |
10 | * - Set up prctl(PR_SET_PDEATHSIG). |
11 | * - Set up prctl(PR_SET_CHILD_SUBREAPER). | |
d2c9b2a9 MF |
12 | */ |
13 | ||
14 | #define _GNU_SOURCE | |
15 | ||
16 | #include <err.h> | |
17 | #include <errno.h> | |
18 | #include <fcntl.h> | |
19 | #include <getopt.h> | |
20 | #include <sched.h> | |
21 | #include <signal.h> | |
22 | #include <stdbool.h> | |
23 | #include <stdint.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | #include <sysexits.h> | |
28 | #include <unistd.h> | |
29 | #include <net/if.h> | |
30 | #include <sys/ioctl.h> | |
31 | #include <sys/mount.h> | |
32 | #include <sys/prctl.h> | |
33 | #include <sys/socket.h> | |
34 | #include <sys/types.h> | |
35 | #include <sys/wait.h> | |
36 | ||
37 | #define PROG "vunshare" | |
38 | ||
39 | static bool vunshare(int flags) | |
40 | { | |
41 | if (unshare(flags) == -1) { | |
42 | if (errno != EINVAL) | |
43 | err(1, "unshare failed"); | |
44 | return false; | |
45 | } | |
46 | return true; | |
47 | } | |
48 | ||
49 | static void unshare_net(void) | |
50 | { | |
51 | if (!vunshare(CLONE_NEWNET)) | |
52 | return; | |
53 | ||
54 | int sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0); | |
55 | struct ifreq ifr; | |
56 | ||
57 | /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */ | |
58 | strcpy(ifr.ifr_name, "lo"); | |
59 | if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) | |
60 | err(1, "ioctl(SIOCGIFFLAGS) failed"); | |
61 | strcpy(ifr.ifr_name, "lo"); | |
62 | ifr.ifr_flags |= IFF_UP | IFF_RUNNING; | |
63 | if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) | |
64 | err(1, "ioctl(SIOCSIFFLAGS) failed"); | |
65 | } | |
66 | ||
67 | static char **title_argv; | |
68 | static void setproctitle(const char *title) | |
69 | { | |
70 | /* Hopefully 1k is all we ever need. */ | |
71 | char newtitle[1024]; | |
72 | memset(newtitle, 0, sizeof(newtitle)); | |
73 | int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title); | |
74 | ||
75 | prctl(PR_SET_NAME, (uintptr_t)newtitle); | |
76 | ||
77 | /* Clobber argv to set the title. Need to figure out how much space though. */ | |
78 | int argc = 0; | |
79 | size_t i = 0; | |
80 | while (title_argv[argc]) | |
81 | i += strlen(title_argv[argc++]) + 1; | |
82 | /* Now scan the environ table. */ | |
83 | while (title_argv[argc]) | |
84 | i += strlen(title_argv[argc++]) + 1; | |
85 | ||
86 | if (i < len) | |
87 | newtitle[i] = '\0'; | |
88 | /* This will NUL pad the string for us too. */ | |
89 | strncpy(title_argv[0], newtitle, i); | |
90 | } | |
91 | ||
92 | static void close_fds(void) | |
93 | { | |
94 | int i; | |
95 | for (i = 3; i < 10; ++i) | |
96 | close(i); | |
97 | } | |
98 | ||
99 | static void exit_as_status_ext(int status) | |
100 | { | |
101 | int sig_status = 0; | |
102 | int exit_status = WEXITSTATUS(status); | |
103 | ||
104 | if (WIFSIGNALED(status)) { | |
105 | sig_status = WTERMSIG(status); | |
106 | } else if (exit_status > 128) { | |
107 | /* For the external init, translate the signal status back. | |
108 | * TODO: This gets it wrong when the child actually exited. | |
109 | * We need to set up a pipe between the two inits so we can | |
110 | * get back the proper details. | |
111 | */ | |
112 | sig_status = exit_status - 128; | |
113 | } | |
114 | ||
115 | if (sig_status) { | |
116 | signal(sig_status, SIG_DFL); | |
117 | kill(getpid(), sig_status); | |
118 | ||
119 | /* Still here ? Maybe the signal was masked. Just exit. */ | |
120 | exit_status = 128 + sig_status; | |
121 | } | |
122 | ||
123 | exit(exit_status); | |
124 | } | |
125 | ||
126 | static void exit_as_status_int(int status) | |
127 | { | |
128 | /* If we are the init for the pid ns, we can't kill ourselves -- | |
129 | * the kernel explicitly disallows this. Just exit with a high | |
130 | * status value instead. Our parent will handle it themselves. | |
131 | */ | |
132 | int exit_status; | |
133 | ||
134 | if (WIFSIGNALED(status)) | |
135 | exit_status = 128 + WTERMSIG(status); | |
136 | else | |
137 | exit_status = WEXITSTATUS(status); | |
138 | ||
139 | exit(exit_status); | |
140 | } | |
141 | ||
142 | static int reap_children(void) | |
143 | { | |
144 | pid_t pid; | |
145 | int status = 1; | |
146 | while (1) { | |
147 | pid = wait(&status); | |
148 | if (pid == -1) | |
149 | break; | |
150 | } | |
151 | return status; | |
152 | } | |
153 | ||
154 | static pid_t child_pid; | |
155 | static void signal_passthru(int sig, siginfo_t *siginfo, void *context) | |
156 | { | |
157 | if (getpid() == 1) { | |
158 | /* Internal init. */ | |
159 | ||
160 | /* If the signal is coming from our children, ignore it. | |
161 | * If it's coming from outside the pid ns, pass it along. | |
162 | */ | |
163 | if (siginfo->si_pid != 0) | |
164 | return; | |
165 | ||
166 | /* Kill all the children! */ | |
167 | kill(-1, sig); | |
168 | } else { | |
169 | /* External init. */ | |
170 | ||
171 | /* Just forward signal to the child. */ | |
172 | kill(child_pid, sig); | |
173 | } | |
174 | } | |
175 | ||
176 | /* We want to forward some signals to the child process. Block the rest. | |
177 | * We don't actually exit as we wait for the child to die/process the signal | |
178 | * first, and then we'll kill/exit after that point. | |
179 | */ | |
180 | static void setup_signal_handler(pid_t pid) | |
181 | { | |
182 | int i; | |
183 | ||
184 | struct sigaction sa = { | |
185 | .sa_sigaction = signal_passthru, | |
186 | .sa_flags = SA_SIGINFO | SA_RESTART, | |
187 | }; | |
188 | ||
189 | child_pid = pid; | |
190 | ||
191 | for (i = 1; i < SIGUNUSED; ++i) | |
192 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
193 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
194 | for (i = SIGRTMIN; i <= SIGRTMAX; ++i) | |
195 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
196 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
197 | ||
198 | /* As an init, we will reap the children via wait(). */ | |
199 | signal(SIGCHLD, SIG_DFL); | |
200 | } | |
201 | ||
202 | static bool unshare_pid(bool daemonize) | |
203 | { | |
204 | if (!vunshare(CLONE_NEWPID)) | |
205 | return false; | |
206 | ||
207 | pid_t pid; | |
208 | ||
209 | /* Set up external init process. */ | |
210 | pid = fork(); | |
211 | switch (pid) { | |
212 | case -1: err(1, "fork() failed"); | |
213 | case 0: break; | |
214 | default: | |
215 | if (daemonize) | |
216 | exit(0); | |
217 | setproctitle("ext init"); | |
218 | setup_signal_handler(pid); | |
219 | close_fds(); | |
220 | exit_as_status_ext(reap_children()); | |
221 | } | |
222 | ||
223 | if (daemonize) { | |
224 | if (setsid() == -1) | |
225 | err(1, "setsid() failed"); | |
226 | ||
227 | int fd = open("/dev/null", O_RDWR); | |
228 | if (fd == -1) | |
229 | err(1, "open(/dev/null) failed"); | |
230 | if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1) | |
231 | err(1, "dup2() failed"); | |
232 | if (fd > 2) | |
233 | close(fd); | |
234 | } | |
235 | ||
236 | /* Set up fresh /proc. */ | |
237 | if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, "")) | |
238 | err(1, "mount(/proc, MS_PRIVATE) failed"); | |
239 | if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "")) | |
240 | err(1, "mount(/proc) failed"); | |
241 | ||
242 | /* Set up internal init process. */ | |
243 | pid = fork(); | |
244 | switch (pid) { | |
245 | case -1: err(1, "fork() failed"); | |
246 | case 0: break; | |
247 | default: | |
248 | setproctitle("int init"); | |
249 | setup_signal_handler(pid); | |
250 | close_fds(); | |
251 | exit_as_status_int(reap_children()); | |
252 | } | |
253 | ||
254 | return true; | |
255 | } | |
256 | ||
257 | static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid) | |
258 | { | |
259 | FILE *fp; | |
260 | ||
261 | fp = fopen("/proc/self/setgroups", "w"); | |
262 | if (fp) { | |
263 | fputs("deny\n", fp); | |
264 | fclose(fp); | |
265 | } | |
266 | ||
267 | fp = fopen("/proc/self/uid_map", "w"); | |
268 | fprintf(fp, "%u %u 1\n", iuid, ouid); | |
269 | fclose(fp); | |
270 | ||
271 | fp = fopen("/proc/self/gid_map", "w"); | |
272 | fprintf(fp, "%u %u 1\n", igid, ogid); | |
273 | fclose(fp); | |
274 | } | |
275 | ||
276 | #define a_argument required_argument | |
277 | static const struct option opts[] = { | |
278 | { "pid", a_argument, NULL, 1 }, | |
279 | { NULL, 0, NULL, 0 }, | |
280 | }; | |
281 | ||
282 | static void usage(void) | |
283 | { | |
284 | puts("Usage: unshare [options] <program>"); | |
285 | exit(EX_USAGE); | |
286 | } | |
287 | ||
288 | int main(int argc, char *argv[]) | |
289 | { | |
290 | int c; | |
291 | FILE *pidfp; | |
292 | const char *pid = NULL; | |
293 | bool newipc = false; | |
294 | bool newmnt = false; | |
295 | bool newnet = false; | |
296 | bool newpid = false; | |
297 | bool newuts = false; | |
298 | bool newusr = false; | |
299 | bool daemonize = false; | |
300 | uid_t uid; | |
301 | gid_t gid; | |
302 | ||
303 | title_argv = argv; | |
304 | ||
305 | while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) { | |
306 | switch (c) { | |
307 | case 1: | |
308 | pid = optarg; | |
309 | break; | |
310 | case 'i': newipc = true; break; | |
311 | case 'm': newmnt = true; break; | |
312 | case 'n': newnet = true; break; | |
313 | case 'p': newpid = true; break; | |
314 | case 'u': newuts = true; break; | |
315 | case 'U': newusr = true; break; | |
316 | case 'D': daemonize = true; break; | |
317 | default: | |
318 | usage(); | |
319 | } | |
320 | } | |
321 | argc -= optind; | |
322 | argv += optind; | |
323 | ||
324 | if (argc == 0) | |
325 | usage(); | |
326 | ||
327 | if (newusr) { | |
328 | uid = getuid(); | |
329 | gid = getgid(); | |
330 | if (vunshare(CLONE_NEWUSER)) | |
331 | map_uid_gid(0, 0, uid, gid); | |
332 | else | |
333 | newusr = false; | |
334 | } | |
335 | ||
336 | if (newmnt || newpid) | |
337 | vunshare(CLONE_NEWNS); | |
338 | if (newuts) | |
339 | vunshare(CLONE_NEWUTS); | |
340 | if (newipc) | |
341 | vunshare(CLONE_NEWIPC); | |
342 | if (newnet) | |
343 | unshare_net(); | |
344 | ||
345 | if (pid) { | |
346 | pidfp = fopen(pid, "we"); | |
347 | if (pidfp == NULL) | |
348 | err(1, "fopen(%s) failed", pid); | |
349 | } | |
350 | ||
351 | if (newpid && unshare_pid(daemonize)) { | |
352 | /* Nothing. */ | |
353 | } else if (daemonize) | |
354 | if (daemon(1, 0)) | |
355 | err(1, "daemon() failed"); | |
356 | ||
357 | if (pid) { | |
358 | fprintf(pidfp, "%u\n", getpid()); | |
359 | fclose(pidfp); | |
360 | } | |
361 | ||
362 | if (newusr) | |
363 | if (vunshare(CLONE_NEWUSER)) | |
364 | map_uid_gid(uid, gid, 0, 0); | |
365 | ||
366 | execvp(argv[0], argv); | |
367 | fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); | |
368 | return 127; | |
369 | } |