From d2c9b2a9351fc950cef09adb8e019b70c029aee2 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 15 Jun 2016 14:34:52 -0400 Subject: [PATCH] vunshare --- vunshare.c | 367 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 vunshare.c diff --git a/vunshare.c b/vunshare.c new file mode 100644 index 0000000..5984f07 --- /dev/null +++ b/vunshare.c @@ -0,0 +1,367 @@ +/* Unshare daemonizer. + * Written by Mike Frysinger + * Released into the public domain. + */ + +/* TODO: + * - Add userns support. + * - Make pidns init optional. + * - Make setproctitle nicer and include program argv[0]. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PROG "vunshare" + +static bool vunshare(int flags) +{ + if (unshare(flags) == -1) { + if (errno != EINVAL) + err(1, "unshare failed"); + return false; + } + return true; +} + +static void unshare_net(void) +{ + if (!vunshare(CLONE_NEWNET)) + return; + + int sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0); + struct ifreq ifr; + + /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */ + strcpy(ifr.ifr_name, "lo"); + if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) + err(1, "ioctl(SIOCGIFFLAGS) failed"); + strcpy(ifr.ifr_name, "lo"); + ifr.ifr_flags |= IFF_UP | IFF_RUNNING; + if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) + err(1, "ioctl(SIOCSIFFLAGS) failed"); +} + +static char **title_argv; +static void setproctitle(const char *title) +{ + /* Hopefully 1k is all we ever need. */ + char newtitle[1024]; + memset(newtitle, 0, sizeof(newtitle)); + int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title); + + prctl(PR_SET_NAME, (uintptr_t)newtitle); + + /* Clobber argv to set the title. Need to figure out how much space though. */ + int argc = 0; + size_t i = 0; + while (title_argv[argc]) + i += strlen(title_argv[argc++]) + 1; + /* Now scan the environ table. */ + while (title_argv[argc]) + i += strlen(title_argv[argc++]) + 1; + + if (i < len) + newtitle[i] = '\0'; + /* This will NUL pad the string for us too. */ + strncpy(title_argv[0], newtitle, i); +} + +static void close_fds(void) +{ + int i; + for (i = 3; i < 10; ++i) + close(i); +} + +static void exit_as_status_ext(int status) +{ + int sig_status = 0; + int exit_status = WEXITSTATUS(status); + + if (WIFSIGNALED(status)) { + sig_status = WTERMSIG(status); + } else if (exit_status > 128) { + /* For the external init, translate the signal status back. + * TODO: This gets it wrong when the child actually exited. + * We need to set up a pipe between the two inits so we can + * get back the proper details. + */ + sig_status = exit_status - 128; + } + + if (sig_status) { + signal(sig_status, SIG_DFL); + kill(getpid(), sig_status); + + /* Still here ? Maybe the signal was masked. Just exit. */ + exit_status = 128 + sig_status; + } + + exit(exit_status); +} + +static void exit_as_status_int(int status) +{ + /* If we are the init for the pid ns, we can't kill ourselves -- + * the kernel explicitly disallows this. Just exit with a high + * status value instead. Our parent will handle it themselves. + */ + int exit_status; + + if (WIFSIGNALED(status)) + exit_status = 128 + WTERMSIG(status); + else + exit_status = WEXITSTATUS(status); + + exit(exit_status); +} + +static int reap_children(void) +{ + pid_t pid; + int status = 1; + while (1) { + pid = wait(&status); + if (pid == -1) + break; + } + return status; +} + +static pid_t child_pid; +static void signal_passthru(int sig, siginfo_t *siginfo, void *context) +{ + if (getpid() == 1) { + /* Internal init. */ + + /* If the signal is coming from our children, ignore it. + * If it's coming from outside the pid ns, pass it along. + */ + if (siginfo->si_pid != 0) + return; + + /* Kill all the children! */ + kill(-1, sig); + } else { + /* External init. */ + + /* Just forward signal to the child. */ + kill(child_pid, sig); + } +} + +/* We want to forward some signals to the child process. Block the rest. + * We don't actually exit as we wait for the child to die/process the signal + * first, and then we'll kill/exit after that point. + */ +static void setup_signal_handler(pid_t pid) +{ + int i; + + struct sigaction sa = { + .sa_sigaction = signal_passthru, + .sa_flags = SA_SIGINFO | SA_RESTART, + }; + + child_pid = pid; + + for (i = 1; i < SIGUNUSED; ++i) + if (sigaction(i, &sa, NULL) && errno != EINVAL) + fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); + for (i = SIGRTMIN; i <= SIGRTMAX; ++i) + if (sigaction(i, &sa, NULL) && errno != EINVAL) + fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno)); + + /* As an init, we will reap the children via wait(). */ + signal(SIGCHLD, SIG_DFL); +} + +static bool unshare_pid(bool daemonize) +{ + if (!vunshare(CLONE_NEWPID)) + return false; + + pid_t pid; + + /* Set up external init process. */ + pid = fork(); + switch (pid) { + case -1: err(1, "fork() failed"); + case 0: break; + default: + if (daemonize) + exit(0); + setproctitle("ext init"); + setup_signal_handler(pid); + close_fds(); + exit_as_status_ext(reap_children()); + } + + if (daemonize) { + if (setsid() == -1) + err(1, "setsid() failed"); + + int fd = open("/dev/null", O_RDWR); + if (fd == -1) + err(1, "open(/dev/null) failed"); + if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1) + err(1, "dup2() failed"); + if (fd > 2) + close(fd); + } + + /* Set up fresh /proc. */ + if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, "")) + err(1, "mount(/proc, MS_PRIVATE) failed"); + if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "")) + err(1, "mount(/proc) failed"); + + /* Set up internal init process. */ + pid = fork(); + switch (pid) { + case -1: err(1, "fork() failed"); + case 0: break; + default: + setproctitle("int init"); + setup_signal_handler(pid); + close_fds(); + exit_as_status_int(reap_children()); + } + + return true; +} + +static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid) +{ + FILE *fp; + + fp = fopen("/proc/self/setgroups", "w"); + if (fp) { + fputs("deny\n", fp); + fclose(fp); + } + + fp = fopen("/proc/self/uid_map", "w"); + fprintf(fp, "%u %u 1\n", iuid, ouid); + fclose(fp); + + fp = fopen("/proc/self/gid_map", "w"); + fprintf(fp, "%u %u 1\n", igid, ogid); + fclose(fp); +} + +#define a_argument required_argument +static const struct option opts[] = { + { "pid", a_argument, NULL, 1 }, + { NULL, 0, NULL, 0 }, +}; + +static void usage(void) +{ + puts("Usage: unshare [options] "); + exit(EX_USAGE); +} + +int main(int argc, char *argv[]) +{ + int c; + FILE *pidfp; + const char *pid = NULL; + bool newipc = false; + bool newmnt = false; + bool newnet = false; + bool newpid = false; + bool newuts = false; + bool newusr = false; + bool daemonize = false; + uid_t uid; + gid_t gid; + + title_argv = argv; + + while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) { + switch (c) { + case 1: + pid = optarg; + break; + case 'i': newipc = true; break; + case 'm': newmnt = true; break; + case 'n': newnet = true; break; + case 'p': newpid = true; break; + case 'u': newuts = true; break; + case 'U': newusr = true; break; + case 'D': daemonize = true; break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (argc == 0) + usage(); + + if (newusr) { + uid = getuid(); + gid = getgid(); + if (vunshare(CLONE_NEWUSER)) + map_uid_gid(0, 0, uid, gid); + else + newusr = false; + } + + if (newmnt || newpid) + vunshare(CLONE_NEWNS); + if (newuts) + vunshare(CLONE_NEWUTS); + if (newipc) + vunshare(CLONE_NEWIPC); + if (newnet) + unshare_net(); + + if (pid) { + pidfp = fopen(pid, "we"); + if (pidfp == NULL) + err(1, "fopen(%s) failed", pid); + } + + if (newpid && unshare_pid(daemonize)) { + /* Nothing. */ + } else if (daemonize) + if (daemon(1, 0)) + err(1, "daemon() failed"); + + if (pid) { + fprintf(pidfp, "%u\n", getpid()); + fclose(pidfp); + } + + if (newusr) + if (vunshare(CLONE_NEWUSER)) + map_uid_gid(uid, gid, 0, 0); + + execvp(argv[0], argv); + fprintf(stderr, "%s: %s\n", argv[0], strerror(errno)); + return 127; +} -- 2.39.2