]>
Commit | Line | Data |
---|---|---|
d2c9b2a9 MF |
1 | /* Unshare daemonizer. |
2 | * Written by Mike Frysinger <vapier@gmail.com> | |
3 | * Released into the public domain. | |
4 | */ | |
5 | ||
6 | /* TODO: | |
7 | * - Add userns support. | |
8 | * - Make pidns init optional. | |
9 | * - Make setproctitle nicer and include program argv[0]. | |
10 | */ | |
11 | ||
12 | #define _GNU_SOURCE | |
13 | ||
14 | #include <err.h> | |
15 | #include <errno.h> | |
16 | #include <fcntl.h> | |
17 | #include <getopt.h> | |
18 | #include <sched.h> | |
19 | #include <signal.h> | |
20 | #include <stdbool.h> | |
21 | #include <stdint.h> | |
22 | #include <stdio.h> | |
23 | #include <stdlib.h> | |
24 | #include <string.h> | |
25 | #include <sysexits.h> | |
26 | #include <unistd.h> | |
27 | #include <net/if.h> | |
28 | #include <sys/ioctl.h> | |
29 | #include <sys/mount.h> | |
30 | #include <sys/prctl.h> | |
31 | #include <sys/socket.h> | |
32 | #include <sys/types.h> | |
33 | #include <sys/wait.h> | |
34 | ||
35 | #define PROG "vunshare" | |
36 | ||
37 | static bool vunshare(int flags) | |
38 | { | |
39 | if (unshare(flags) == -1) { | |
40 | if (errno != EINVAL) | |
41 | err(1, "unshare failed"); | |
42 | return false; | |
43 | } | |
44 | return true; | |
45 | } | |
46 | ||
47 | static void unshare_net(void) | |
48 | { | |
49 | if (!vunshare(CLONE_NEWNET)) | |
50 | return; | |
51 | ||
52 | int sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0); | |
53 | struct ifreq ifr; | |
54 | ||
55 | /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */ | |
56 | strcpy(ifr.ifr_name, "lo"); | |
57 | if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) | |
58 | err(1, "ioctl(SIOCGIFFLAGS) failed"); | |
59 | strcpy(ifr.ifr_name, "lo"); | |
60 | ifr.ifr_flags |= IFF_UP | IFF_RUNNING; | |
61 | if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) | |
62 | err(1, "ioctl(SIOCSIFFLAGS) failed"); | |
63 | } | |
64 | ||
65 | static char **title_argv; | |
66 | static void setproctitle(const char *title) | |
67 | { | |
68 | /* Hopefully 1k is all we ever need. */ | |
69 | char newtitle[1024]; | |
70 | memset(newtitle, 0, sizeof(newtitle)); | |
71 | int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title); | |
72 | ||
73 | prctl(PR_SET_NAME, (uintptr_t)newtitle); | |
74 | ||
75 | /* Clobber argv to set the title. Need to figure out how much space though. */ | |
76 | int argc = 0; | |
77 | size_t i = 0; | |
78 | while (title_argv[argc]) | |
79 | i += strlen(title_argv[argc++]) + 1; | |
80 | /* Now scan the environ table. */ | |
81 | while (title_argv[argc]) | |
82 | i += strlen(title_argv[argc++]) + 1; | |
83 | ||
84 | if (i < len) | |
85 | newtitle[i] = '\0'; | |
86 | /* This will NUL pad the string for us too. */ | |
87 | strncpy(title_argv[0], newtitle, i); | |
88 | } | |
89 | ||
90 | static void close_fds(void) | |
91 | { | |
92 | int i; | |
93 | for (i = 3; i < 10; ++i) | |
94 | close(i); | |
95 | } | |
96 | ||
97 | static void exit_as_status_ext(int status) | |
98 | { | |
99 | int sig_status = 0; | |
100 | int exit_status = WEXITSTATUS(status); | |
101 | ||
102 | if (WIFSIGNALED(status)) { | |
103 | sig_status = WTERMSIG(status); | |
104 | } else if (exit_status > 128) { | |
105 | /* For the external init, translate the signal status back. | |
106 | * TODO: This gets it wrong when the child actually exited. | |
107 | * We need to set up a pipe between the two inits so we can | |
108 | * get back the proper details. | |
109 | */ | |
110 | sig_status = exit_status - 128; | |
111 | } | |
112 | ||
113 | if (sig_status) { | |
114 | signal(sig_status, SIG_DFL); | |
115 | kill(getpid(), sig_status); | |
116 | ||
117 | /* Still here ? Maybe the signal was masked. Just exit. */ | |
118 | exit_status = 128 + sig_status; | |
119 | } | |
120 | ||
121 | exit(exit_status); | |
122 | } | |
123 | ||
124 | static void exit_as_status_int(int status) | |
125 | { | |
126 | /* If we are the init for the pid ns, we can't kill ourselves -- | |
127 | * the kernel explicitly disallows this. Just exit with a high | |
128 | * status value instead. Our parent will handle it themselves. | |
129 | */ | |
130 | int exit_status; | |
131 | ||
132 | if (WIFSIGNALED(status)) | |
133 | exit_status = 128 + WTERMSIG(status); | |
134 | else | |
135 | exit_status = WEXITSTATUS(status); | |
136 | ||
137 | exit(exit_status); | |
138 | } | |
139 | ||
140 | static int reap_children(void) | |
141 | { | |
142 | pid_t pid; | |
143 | int status = 1; | |
144 | while (1) { | |
145 | pid = wait(&status); | |
146 | if (pid == -1) | |
147 | break; | |
148 | } | |
149 | return status; | |
150 | } | |
151 | ||
152 | static pid_t child_pid; | |
153 | static void signal_passthru(int sig, siginfo_t *siginfo, void *context) | |
154 | { | |
155 | if (getpid() == 1) { | |
156 | /* Internal init. */ | |
157 | ||
158 | /* If the signal is coming from our children, ignore it. | |
159 | * If it's coming from outside the pid ns, pass it along. | |
160 | */ | |
161 | if (siginfo->si_pid != 0) | |
162 | return; | |
163 | ||
164 | /* Kill all the children! */ | |
165 | kill(-1, sig); | |
166 | } else { | |
167 | /* External init. */ | |
168 | ||
169 | /* Just forward signal to the child. */ | |
170 | kill(child_pid, sig); | |
171 | } | |
172 | } | |
173 | ||
174 | /* We want to forward some signals to the child process. Block the rest. | |
175 | * We don't actually exit as we wait for the child to die/process the signal | |
176 | * first, and then we'll kill/exit after that point. | |
177 | */ | |
178 | static void setup_signal_handler(pid_t pid) | |
179 | { | |
180 | int i; | |
181 | ||
182 | struct sigaction sa = { | |
183 | .sa_sigaction = signal_passthru, | |
184 | .sa_flags = SA_SIGINFO | SA_RESTART, | |
185 | }; | |
186 | ||
187 | child_pid = pid; | |
188 | ||
189 | for (i = 1; i < SIGUNUSED; ++i) | |
190 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
191 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
192 | for (i = SIGRTMIN; i <= SIGRTMAX; ++i) | |
193 | if (sigaction(i, &sa, NULL) && errno != EINVAL) | |
194 | fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); | |
195 | ||
196 | /* As an init, we will reap the children via wait(). */ | |
197 | signal(SIGCHLD, SIG_DFL); | |
198 | } | |
199 | ||
200 | static bool unshare_pid(bool daemonize) | |
201 | { | |
202 | if (!vunshare(CLONE_NEWPID)) | |
203 | return false; | |
204 | ||
205 | pid_t pid; | |
206 | ||
207 | /* Set up external init process. */ | |
208 | pid = fork(); | |
209 | switch (pid) { | |
210 | case -1: err(1, "fork() failed"); | |
211 | case 0: break; | |
212 | default: | |
213 | if (daemonize) | |
214 | exit(0); | |
215 | setproctitle("ext init"); | |
216 | setup_signal_handler(pid); | |
217 | close_fds(); | |
218 | exit_as_status_ext(reap_children()); | |
219 | } | |
220 | ||
221 | if (daemonize) { | |
222 | if (setsid() == -1) | |
223 | err(1, "setsid() failed"); | |
224 | ||
225 | int fd = open("/dev/null", O_RDWR); | |
226 | if (fd == -1) | |
227 | err(1, "open(/dev/null) failed"); | |
228 | if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1) | |
229 | err(1, "dup2() failed"); | |
230 | if (fd > 2) | |
231 | close(fd); | |
232 | } | |
233 | ||
234 | /* Set up fresh /proc. */ | |
235 | if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, "")) | |
236 | err(1, "mount(/proc, MS_PRIVATE) failed"); | |
237 | if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "")) | |
238 | err(1, "mount(/proc) failed"); | |
239 | ||
240 | /* Set up internal init process. */ | |
241 | pid = fork(); | |
242 | switch (pid) { | |
243 | case -1: err(1, "fork() failed"); | |
244 | case 0: break; | |
245 | default: | |
246 | setproctitle("int init"); | |
247 | setup_signal_handler(pid); | |
248 | close_fds(); | |
249 | exit_as_status_int(reap_children()); | |
250 | } | |
251 | ||
252 | return true; | |
253 | } | |
254 | ||
255 | static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid) | |
256 | { | |
257 | FILE *fp; | |
258 | ||
259 | fp = fopen("/proc/self/setgroups", "w"); | |
260 | if (fp) { | |
261 | fputs("deny\n", fp); | |
262 | fclose(fp); | |
263 | } | |
264 | ||
265 | fp = fopen("/proc/self/uid_map", "w"); | |
266 | fprintf(fp, "%u %u 1\n", iuid, ouid); | |
267 | fclose(fp); | |
268 | ||
269 | fp = fopen("/proc/self/gid_map", "w"); | |
270 | fprintf(fp, "%u %u 1\n", igid, ogid); | |
271 | fclose(fp); | |
272 | } | |
273 | ||
274 | #define a_argument required_argument | |
275 | static const struct option opts[] = { | |
276 | { "pid", a_argument, NULL, 1 }, | |
277 | { NULL, 0, NULL, 0 }, | |
278 | }; | |
279 | ||
280 | static void usage(void) | |
281 | { | |
282 | puts("Usage: unshare [options] <program>"); | |
283 | exit(EX_USAGE); | |
284 | } | |
285 | ||
286 | int main(int argc, char *argv[]) | |
287 | { | |
288 | int c; | |
289 | FILE *pidfp; | |
290 | const char *pid = NULL; | |
291 | bool newipc = false; | |
292 | bool newmnt = false; | |
293 | bool newnet = false; | |
294 | bool newpid = false; | |
295 | bool newuts = false; | |
296 | bool newusr = false; | |
297 | bool daemonize = false; | |
298 | uid_t uid; | |
299 | gid_t gid; | |
300 | ||
301 | title_argv = argv; | |
302 | ||
303 | while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) { | |
304 | switch (c) { | |
305 | case 1: | |
306 | pid = optarg; | |
307 | break; | |
308 | case 'i': newipc = true; break; | |
309 | case 'm': newmnt = true; break; | |
310 | case 'n': newnet = true; break; | |
311 | case 'p': newpid = true; break; | |
312 | case 'u': newuts = true; break; | |
313 | case 'U': newusr = true; break; | |
314 | case 'D': daemonize = true; break; | |
315 | default: | |
316 | usage(); | |
317 | } | |
318 | } | |
319 | argc -= optind; | |
320 | argv += optind; | |
321 | ||
322 | if (argc == 0) | |
323 | usage(); | |
324 | ||
325 | if (newusr) { | |
326 | uid = getuid(); | |
327 | gid = getgid(); | |
328 | if (vunshare(CLONE_NEWUSER)) | |
329 | map_uid_gid(0, 0, uid, gid); | |
330 | else | |
331 | newusr = false; | |
332 | } | |
333 | ||
334 | if (newmnt || newpid) | |
335 | vunshare(CLONE_NEWNS); | |
336 | if (newuts) | |
337 | vunshare(CLONE_NEWUTS); | |
338 | if (newipc) | |
339 | vunshare(CLONE_NEWIPC); | |
340 | if (newnet) | |
341 | unshare_net(); | |
342 | ||
343 | if (pid) { | |
344 | pidfp = fopen(pid, "we"); | |
345 | if (pidfp == NULL) | |
346 | err(1, "fopen(%s) failed", pid); | |
347 | } | |
348 | ||
349 | if (newpid && unshare_pid(daemonize)) { | |
350 | /* Nothing. */ | |
351 | } else if (daemonize) | |
352 | if (daemon(1, 0)) | |
353 | err(1, "daemon() failed"); | |
354 | ||
355 | if (pid) { | |
356 | fprintf(pidfp, "%u\n", getpid()); | |
357 | fclose(pidfp); | |
358 | } | |
359 | ||
360 | if (newusr) | |
361 | if (vunshare(CLONE_NEWUSER)) | |
362 | map_uid_gid(uid, gid, 0, 0); | |
363 | ||
364 | execvp(argv[0], argv); | |
365 | fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); | |
366 | return 127; | |
367 | } |