2 * Written by Mike Frysinger <vapier@gmail.com>
3 * Released into the public domain.
7 * - Add userns support.
8 * - Make pidns init optional.
9 * - Make setproctitle nicer and include program argv[0].
10 * - Set up prctl(PR_SET_PDEATHSIG).
11 * - Set up prctl(PR_SET_CHILD_SUBREAPER).
30 #include <sys/ioctl.h>
31 #include <sys/mount.h>
32 #include <sys/prctl.h>
33 #include <sys/socket.h>
34 #include <sys/types.h>
37 #define PROG "vunshare"
39 static bool vunshare(int flags)
41 if (unshare(flags) == -1) {
43 err(1, "unshare failed");
49 static void unshare_net(void)
51 if (!vunshare(CLONE_NEWNET))
57 sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0);
59 err(1, "socket(AF_LOCAL) failed");
61 /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */
62 strcpy(ifr.ifr_name, "lo");
63 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
64 err(1, "ioctl(SIOCGIFFLAGS) failed");
66 /* The kernel preserves ifr.ifr_name for use. */
67 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
68 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
69 err(1, "ioctl(SIOCSIFFLAGS) failed");
74 static char **title_argv;
75 static void setproctitle(const char *title)
77 /* Hopefully 1k is all we ever need. */
79 memset(newtitle, 0, sizeof(newtitle));
80 int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title);
82 prctl(PR_SET_NAME, (uintptr_t)newtitle);
84 /* Clobber argv to set the title. Need to figure out how much space though. */
87 while (title_argv[argc])
88 i += strlen(title_argv[argc++]) + 1;
89 /* Now scan the environ table. */
90 while (title_argv[argc])
91 i += strlen(title_argv[argc++]) + 1;
95 /* This will NUL pad the string for us too. */
96 strncpy(title_argv[0], newtitle, i);
99 static void close_fds(void)
102 for (i = 3; i < 10; ++i)
106 static void exit_as_status_ext(int status)
109 int exit_status = WEXITSTATUS(status);
111 if (WIFSIGNALED(status)) {
112 sig_status = WTERMSIG(status);
113 } else if (exit_status > 128) {
114 /* For the external init, translate the signal status back.
115 * TODO: This gets it wrong when the child actually exited.
116 * We need to set up a pipe between the two inits so we can
117 * get back the proper details.
119 sig_status = exit_status - 128;
123 signal(sig_status, SIG_DFL);
124 kill(getpid(), sig_status);
126 /* Still here ? Maybe the signal was masked. Just exit. */
127 exit_status = 128 + sig_status;
133 static void exit_as_status_int(int status)
135 /* If we are the init for the pid ns, we can't kill ourselves --
136 * the kernel explicitly disallows this. Just exit with a high
137 * status value instead. Our parent will handle it themselves.
141 if (WIFSIGNALED(status))
142 exit_status = 128 + WTERMSIG(status);
144 exit_status = WEXITSTATUS(status);
149 static int reap_children(void)
161 static pid_t child_pid;
162 static void signal_passthru(int sig, siginfo_t *siginfo, void *context)
167 /* If the signal is coming from our children, ignore it.
168 * If it's coming from outside the pid ns, pass it along.
170 if (siginfo->si_pid != 0)
173 /* Kill all the children! */
178 /* Just forward signal to the child. */
179 kill(child_pid, sig);
183 /* We want to forward some signals to the child process. Block the rest.
184 * We don't actually exit as we wait for the child to die/process the signal
185 * first, and then we'll kill/exit after that point.
187 static void setup_signal_handler(pid_t pid)
191 struct sigaction sa = {
192 .sa_sigaction = signal_passthru,
193 .sa_flags = SA_SIGINFO | SA_RESTART,
198 for (i = 1; i < SIGUNUSED; ++i)
199 if (sigaction(i, &sa, NULL) && errno != EINVAL)
200 fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno));
201 for (i = SIGRTMIN; i <= SIGRTMAX; ++i)
202 if (sigaction(i, &sa, NULL) && errno != EINVAL)
203 fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno));
205 /* As an init, we will reap the children via wait(). */
206 signal(SIGCHLD, SIG_DFL);
209 static bool unshare_pid(bool daemonize)
211 if (!vunshare(CLONE_NEWPID))
216 /* Set up external init process. */
219 case -1: err(1, "fork() failed");
224 setproctitle("ext init");
225 setup_signal_handler(pid);
227 exit_as_status_ext(reap_children());
232 err(1, "setsid() failed");
234 int fd = open("/dev/null", O_RDWR);
236 err(1, "open(/dev/null) failed");
237 if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1)
238 err(1, "dup2() failed");
243 /* Set up fresh /proc. */
244 if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, ""))
245 err(1, "mount(/proc, MS_PRIVATE) failed");
246 if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, ""))
247 err(1, "mount(/proc) failed");
249 /* Set up internal init process. */
252 case -1: err(1, "fork() failed");
255 setproctitle("int init");
256 setup_signal_handler(pid);
258 exit_as_status_int(reap_children());
264 static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid)
268 fp = fopen("/proc/self/setgroups", "w");
274 fp = fopen("/proc/self/uid_map", "w");
275 fprintf(fp, "%u %u 1\n", iuid, ouid);
278 fp = fopen("/proc/self/gid_map", "w");
279 fprintf(fp, "%u %u 1\n", igid, ogid);
283 #define a_argument required_argument
284 static const struct option opts[] = {
285 { "pid", a_argument, NULL, 1 },
286 { NULL, 0, NULL, 0 },
289 static void usage(void)
291 puts("Usage: unshare [options] <program>");
295 int main(int argc, char *argv[])
299 const char *pid = NULL;
306 bool daemonize = false;
312 while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) {
317 case 'i': newipc = true; break;
318 case 'm': newmnt = true; break;
319 case 'n': newnet = true; break;
320 case 'p': newpid = true; break;
321 case 'u': newuts = true; break;
322 case 'U': newusr = true; break;
323 case 'D': daemonize = true; break;
337 if (vunshare(CLONE_NEWUSER))
338 map_uid_gid(0, 0, uid, gid);
343 if (newmnt || newpid)
344 vunshare(CLONE_NEWNS);
346 vunshare(CLONE_NEWUTS);
348 vunshare(CLONE_NEWIPC);
353 pidfp = fopen(pid, "we");
355 err(1, "fopen(%s) failed", pid);
358 if (newpid && unshare_pid(daemonize)) {
360 } else if (daemonize)
362 err(1, "daemon() failed");
365 fprintf(pidfp, "%u\n", getpid());
370 if (vunshare(CLONE_NEWUSER))
371 map_uid_gid(uid, gid, 0, 0);
373 execvp(argv[0], argv);
374 fprintf(stderr, "%s: %s\n", argv[0], strerror(errno));