]> git.wh0rd.org - home.git/blame - vunshare.c
cros-board: update
[home.git] / vunshare.c
CommitLineData
d2c9b2a9
MF
1/* Unshare daemonizer.
2 * Written by Mike Frysinger <vapier@gmail.com>
3 * Released into the public domain.
4 */
5
6/* TODO:
d2c9b2a9
MF
7 * - Make pidns init optional.
8 * - Make setproctitle nicer and include program argv[0].
c692d47f
MF
9 * - Set up prctl(PR_SET_PDEATHSIG).
10 * - Set up prctl(PR_SET_CHILD_SUBREAPER).
d2c9b2a9
MF
11 */
12
13#define _GNU_SOURCE
14
15#include <err.h>
16#include <errno.h>
17#include <fcntl.h>
18#include <getopt.h>
19#include <sched.h>
20#include <signal.h>
21#include <stdbool.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sysexits.h>
27#include <unistd.h>
28#include <net/if.h>
29#include <sys/ioctl.h>
30#include <sys/mount.h>
31#include <sys/prctl.h>
32#include <sys/socket.h>
33#include <sys/types.h>
34#include <sys/wait.h>
35
36#define PROG "vunshare"
37
38static bool vunshare(int flags)
39{
40 if (unshare(flags) == -1) {
41 if (errno != EINVAL)
42 err(1, "unshare failed");
43 return false;
44 }
45 return true;
46}
47
48static void unshare_net(void)
49{
50 if (!vunshare(CLONE_NEWNET))
51 return;
52
2b4610c7 53 int sock;
d2c9b2a9
MF
54 struct ifreq ifr;
55
2b4610c7
MF
56 sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0);
57 if (sock < 0)
58 err(1, "socket(AF_LOCAL) failed");
59
d2c9b2a9
MF
60 /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */
61 strcpy(ifr.ifr_name, "lo");
62 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
63 err(1, "ioctl(SIOCGIFFLAGS) failed");
2b4610c7
MF
64
65 /* The kernel preserves ifr.ifr_name for use. */
d2c9b2a9
MF
66 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
67 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
68 err(1, "ioctl(SIOCSIFFLAGS) failed");
ce7f5e52
MF
69
70 close(sock);
d2c9b2a9
MF
71}
72
73static char **title_argv;
74static void setproctitle(const char *title)
75{
76 /* Hopefully 1k is all we ever need. */
77 char newtitle[1024];
78 memset(newtitle, 0, sizeof(newtitle));
79 int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title);
80
81 prctl(PR_SET_NAME, (uintptr_t)newtitle);
82
83 /* Clobber argv to set the title. Need to figure out how much space though. */
84 int argc = 0;
85 size_t i = 0;
86 while (title_argv[argc])
87 i += strlen(title_argv[argc++]) + 1;
88 /* Now scan the environ table. */
89 while (title_argv[argc])
90 i += strlen(title_argv[argc++]) + 1;
91
92 if (i < len)
93 newtitle[i] = '\0';
94 /* This will NUL pad the string for us too. */
95 strncpy(title_argv[0], newtitle, i);
96}
97
98static void close_fds(void)
99{
100 int i;
101 for (i = 3; i < 10; ++i)
102 close(i);
103}
104
105static void exit_as_status_ext(int status)
106{
107 int sig_status = 0;
108 int exit_status = WEXITSTATUS(status);
109
110 if (WIFSIGNALED(status)) {
111 sig_status = WTERMSIG(status);
112 } else if (exit_status > 128) {
113 /* For the external init, translate the signal status back.
114 * TODO: This gets it wrong when the child actually exited.
115 * We need to set up a pipe between the two inits so we can
116 * get back the proper details.
117 */
118 sig_status = exit_status - 128;
119 }
120
121 if (sig_status) {
122 signal(sig_status, SIG_DFL);
123 kill(getpid(), sig_status);
124
125 /* Still here ? Maybe the signal was masked. Just exit. */
126 exit_status = 128 + sig_status;
127 }
128
129 exit(exit_status);
130}
131
132static void exit_as_status_int(int status)
133{
134 /* If we are the init for the pid ns, we can't kill ourselves --
135 * the kernel explicitly disallows this. Just exit with a high
136 * status value instead. Our parent will handle it themselves.
137 */
138 int exit_status;
139
140 if (WIFSIGNALED(status))
141 exit_status = 128 + WTERMSIG(status);
142 else
143 exit_status = WEXITSTATUS(status);
144
145 exit(exit_status);
146}
147
148static int reap_children(void)
149{
150 pid_t pid;
151 int status = 1;
152 while (1) {
153 pid = wait(&status);
154 if (pid == -1)
155 break;
156 }
157 return status;
158}
159
160static pid_t child_pid;
161static void signal_passthru(int sig, siginfo_t *siginfo, void *context)
162{
163 if (getpid() == 1) {
164 /* Internal init. */
165
166 /* If the signal is coming from our children, ignore it.
167 * If it's coming from outside the pid ns, pass it along.
168 */
169 if (siginfo->si_pid != 0)
170 return;
171
172 /* Kill all the children! */
173 kill(-1, sig);
174 } else {
175 /* External init. */
176
177 /* Just forward signal to the child. */
178 kill(child_pid, sig);
179 }
180}
181
182/* We want to forward some signals to the child process. Block the rest.
183 * We don't actually exit as we wait for the child to die/process the signal
184 * first, and then we'll kill/exit after that point.
185 */
186static void setup_signal_handler(pid_t pid)
187{
188 int i;
189
190 struct sigaction sa = {
191 .sa_sigaction = signal_passthru,
192 .sa_flags = SA_SIGINFO | SA_RESTART,
193 };
194
195 child_pid = pid;
196
36408122 197 for (i = 1; i < SIGRTMIN; ++i)
d2c9b2a9
MF
198 if (sigaction(i, &sa, NULL) && errno != EINVAL)
199 fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno));
200 for (i = SIGRTMIN; i <= SIGRTMAX; ++i)
201 if (sigaction(i, &sa, NULL) && errno != EINVAL)
202 fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno));
203
204 /* As an init, we will reap the children via wait(). */
205 signal(SIGCHLD, SIG_DFL);
206}
207
208static bool unshare_pid(bool daemonize)
209{
210 if (!vunshare(CLONE_NEWPID))
211 return false;
212
213 pid_t pid;
214
215 /* Set up external init process. */
216 pid = fork();
217 switch (pid) {
218 case -1: err(1, "fork() failed");
219 case 0: break;
220 default:
221 if (daemonize)
222 exit(0);
223 setproctitle("ext init");
224 setup_signal_handler(pid);
225 close_fds();
226 exit_as_status_ext(reap_children());
227 }
228
229 if (daemonize) {
230 if (setsid() == -1)
231 err(1, "setsid() failed");
232
233 int fd = open("/dev/null", O_RDWR);
234 if (fd == -1)
235 err(1, "open(/dev/null) failed");
236 if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1)
237 err(1, "dup2() failed");
238 if (fd > 2)
239 close(fd);
240 }
241
242 /* Set up fresh /proc. */
243 if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, ""))
244 err(1, "mount(/proc, MS_PRIVATE) failed");
245 if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, ""))
246 err(1, "mount(/proc) failed");
247
248 /* Set up internal init process. */
249 pid = fork();
250 switch (pid) {
251 case -1: err(1, "fork() failed");
252 case 0: break;
253 default:
254 setproctitle("int init");
255 setup_signal_handler(pid);
256 close_fds();
257 exit_as_status_int(reap_children());
258 }
259
260 return true;
261}
262
263static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid)
264{
265 FILE *fp;
266
267 fp = fopen("/proc/self/setgroups", "w");
268 if (fp) {
269 fputs("deny\n", fp);
270 fclose(fp);
271 }
272
273 fp = fopen("/proc/self/uid_map", "w");
274 fprintf(fp, "%u %u 1\n", iuid, ouid);
275 fclose(fp);
276
277 fp = fopen("/proc/self/gid_map", "w");
278 fprintf(fp, "%u %u 1\n", igid, ogid);
279 fclose(fp);
280}
281
282#define a_argument required_argument
283static const struct option opts[] = {
284 { "pid", a_argument, NULL, 1 },
285 { NULL, 0, NULL, 0 },
286};
287
288static void usage(void)
289{
89f2cda5
MF
290 puts(
291 "Usage: unshare [options] <program>\n"
292 "\n"
293 "Options: [DimnpuU]\n"
294 " -i Use IPC namespaces\n"
295 " -m Use mount namespaces\n"
296 " -n Use net namespaces\n"
297 " -p Use pid namespaces\n"
298 " -u Use UTS namespaces\n"
299 " -U Use user namespaces\n"
300 " -D Daemonize program"
301 );
d2c9b2a9
MF
302 exit(EX_USAGE);
303}
304
305int main(int argc, char *argv[])
306{
307 int c;
308 FILE *pidfp;
309 const char *pid = NULL;
310 bool newipc = false;
311 bool newmnt = false;
312 bool newnet = false;
313 bool newpid = false;
314 bool newuts = false;
315 bool newusr = false;
316 bool daemonize = false;
317 uid_t uid;
318 gid_t gid;
319
320 title_argv = argv;
321
322 while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) {
323 switch (c) {
324 case 1:
325 pid = optarg;
326 break;
327 case 'i': newipc = true; break;
328 case 'm': newmnt = true; break;
329 case 'n': newnet = true; break;
330 case 'p': newpid = true; break;
331 case 'u': newuts = true; break;
332 case 'U': newusr = true; break;
333 case 'D': daemonize = true; break;
334 default:
335 usage();
336 }
337 }
338 argc -= optind;
339 argv += optind;
340
341 if (argc == 0)
342 usage();
343
344 if (newusr) {
345 uid = getuid();
346 gid = getgid();
347 if (vunshare(CLONE_NEWUSER))
348 map_uid_gid(0, 0, uid, gid);
349 else
350 newusr = false;
351 }
352
353 if (newmnt || newpid)
354 vunshare(CLONE_NEWNS);
355 if (newuts)
356 vunshare(CLONE_NEWUTS);
357 if (newipc)
358 vunshare(CLONE_NEWIPC);
359 if (newnet)
360 unshare_net();
361
362 if (pid) {
363 pidfp = fopen(pid, "we");
364 if (pidfp == NULL)
365 err(1, "fopen(%s) failed", pid);
366 }
367
368 if (newpid && unshare_pid(daemonize)) {
369 /* Nothing. */
370 } else if (daemonize)
371 if (daemon(1, 0))
372 err(1, "daemon() failed");
373
374 if (pid) {
375 fprintf(pidfp, "%u\n", getpid());
376 fclose(pidfp);
377 }
378
379 if (newusr)
380 if (vunshare(CLONE_NEWUSER))
381 map_uid_gid(uid, gid, 0, 0);
382
383 execvp(argv[0], argv);
384 fprintf(stderr, "%s: %s\n", argv[0], strerror(errno));
385 return 127;
386}