]> git.wh0rd.org - home.git/blob - vunshare.c
vunshare: close socket after bringing up loopback
[home.git] / vunshare.c
1 /* Unshare daemonizer.
2 * Written by Mike Frysinger <vapier@gmail.com>
3 * Released into the public domain.
4 */
5
6 /* TODO:
7 * - Add userns support.
8 * - Make pidns init optional.
9 * - Make setproctitle nicer and include program argv[0].
10 * - Set up prctl(PR_SET_PDEATHSIG).
11 * - Set up prctl(PR_SET_CHILD_SUBREAPER).
12 */
13
14 #define _GNU_SOURCE
15
16 #include <err.h>
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <getopt.h>
20 #include <sched.h>
21 #include <signal.h>
22 #include <stdbool.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sysexits.h>
28 #include <unistd.h>
29 #include <net/if.h>
30 #include <sys/ioctl.h>
31 #include <sys/mount.h>
32 #include <sys/prctl.h>
33 #include <sys/socket.h>
34 #include <sys/types.h>
35 #include <sys/wait.h>
36
37 #define PROG "vunshare"
38
39 static bool vunshare(int flags)
40 {
41 if (unshare(flags) == -1) {
42 if (errno != EINVAL)
43 err(1, "unshare failed");
44 return false;
45 }
46 return true;
47 }
48
49 static void unshare_net(void)
50 {
51 if (!vunshare(CLONE_NEWNET))
52 return;
53
54 int sock = socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0);
55 struct ifreq ifr;
56
57 /* Equiv of `ip link set up lo`. Kernel will assign 127.0.0.1 for us. */
58 strcpy(ifr.ifr_name, "lo");
59 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0)
60 err(1, "ioctl(SIOCGIFFLAGS) failed");
61 strcpy(ifr.ifr_name, "lo");
62 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
63 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
64 err(1, "ioctl(SIOCSIFFLAGS) failed");
65
66 close(sock);
67 }
68
69 static char **title_argv;
70 static void setproctitle(const char *title)
71 {
72 /* Hopefully 1k is all we ever need. */
73 char newtitle[1024];
74 memset(newtitle, 0, sizeof(newtitle));
75 int len = sprintf(newtitle, "%s: %s [pid ns]", PROG, title);
76
77 prctl(PR_SET_NAME, (uintptr_t)newtitle);
78
79 /* Clobber argv to set the title. Need to figure out how much space though. */
80 int argc = 0;
81 size_t i = 0;
82 while (title_argv[argc])
83 i += strlen(title_argv[argc++]) + 1;
84 /* Now scan the environ table. */
85 while (title_argv[argc])
86 i += strlen(title_argv[argc++]) + 1;
87
88 if (i < len)
89 newtitle[i] = '\0';
90 /* This will NUL pad the string for us too. */
91 strncpy(title_argv[0], newtitle, i);
92 }
93
94 static void close_fds(void)
95 {
96 int i;
97 for (i = 3; i < 10; ++i)
98 close(i);
99 }
100
101 static void exit_as_status_ext(int status)
102 {
103 int sig_status = 0;
104 int exit_status = WEXITSTATUS(status);
105
106 if (WIFSIGNALED(status)) {
107 sig_status = WTERMSIG(status);
108 } else if (exit_status > 128) {
109 /* For the external init, translate the signal status back.
110 * TODO: This gets it wrong when the child actually exited.
111 * We need to set up a pipe between the two inits so we can
112 * get back the proper details.
113 */
114 sig_status = exit_status - 128;
115 }
116
117 if (sig_status) {
118 signal(sig_status, SIG_DFL);
119 kill(getpid(), sig_status);
120
121 /* Still here ? Maybe the signal was masked. Just exit. */
122 exit_status = 128 + sig_status;
123 }
124
125 exit(exit_status);
126 }
127
128 static void exit_as_status_int(int status)
129 {
130 /* If we are the init for the pid ns, we can't kill ourselves --
131 * the kernel explicitly disallows this. Just exit with a high
132 * status value instead. Our parent will handle it themselves.
133 */
134 int exit_status;
135
136 if (WIFSIGNALED(status))
137 exit_status = 128 + WTERMSIG(status);
138 else
139 exit_status = WEXITSTATUS(status);
140
141 exit(exit_status);
142 }
143
144 static int reap_children(void)
145 {
146 pid_t pid;
147 int status = 1;
148 while (1) {
149 pid = wait(&status);
150 if (pid == -1)
151 break;
152 }
153 return status;
154 }
155
156 static pid_t child_pid;
157 static void signal_passthru(int sig, siginfo_t *siginfo, void *context)
158 {
159 if (getpid() == 1) {
160 /* Internal init. */
161
162 /* If the signal is coming from our children, ignore it.
163 * If it's coming from outside the pid ns, pass it along.
164 */
165 if (siginfo->si_pid != 0)
166 return;
167
168 /* Kill all the children! */
169 kill(-1, sig);
170 } else {
171 /* External init. */
172
173 /* Just forward signal to the child. */
174 kill(child_pid, sig);
175 }
176 }
177
178 /* We want to forward some signals to the child process. Block the rest.
179 * We don't actually exit as we wait for the child to die/process the signal
180 * first, and then we'll kill/exit after that point.
181 */
182 static void setup_signal_handler(pid_t pid)
183 {
184 int i;
185
186 struct sigaction sa = {
187 .sa_sigaction = signal_passthru,
188 .sa_flags = SA_SIGINFO | SA_RESTART,
189 };
190
191 child_pid = pid;
192
193 for (i = 1; i < SIGUNUSED; ++i)
194 if (sigaction(i, &sa, NULL) && errno != EINVAL)
195 fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno));
196 for (i = SIGRTMIN; i <= SIGRTMAX; ++i)
197 if (sigaction(i, &sa, NULL) && errno != EINVAL)
198 fprintf(stderr, "sigaction(%i) failed: %s\n", i, strerror(errno));
199
200 /* As an init, we will reap the children via wait(). */
201 signal(SIGCHLD, SIG_DFL);
202 }
203
204 static bool unshare_pid(bool daemonize)
205 {
206 if (!vunshare(CLONE_NEWPID))
207 return false;
208
209 pid_t pid;
210
211 /* Set up external init process. */
212 pid = fork();
213 switch (pid) {
214 case -1: err(1, "fork() failed");
215 case 0: break;
216 default:
217 if (daemonize)
218 exit(0);
219 setproctitle("ext init");
220 setup_signal_handler(pid);
221 close_fds();
222 exit_as_status_ext(reap_children());
223 }
224
225 if (daemonize) {
226 if (setsid() == -1)
227 err(1, "setsid() failed");
228
229 int fd = open("/dev/null", O_RDWR);
230 if (fd == -1)
231 err(1, "open(/dev/null) failed");
232 if (dup2(fd, 0) == -1 || dup2(fd, 1) == -1 || dup2(fd, 2) == -1)
233 err(1, "dup2() failed");
234 if (fd > 2)
235 close(fd);
236 }
237
238 /* Set up fresh /proc. */
239 if (mount("none", "/proc", 0, MS_PRIVATE | MS_REC, ""))
240 err(1, "mount(/proc, MS_PRIVATE) failed");
241 if (mount("proc", "/proc", "proc", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, ""))
242 err(1, "mount(/proc) failed");
243
244 /* Set up internal init process. */
245 pid = fork();
246 switch (pid) {
247 case -1: err(1, "fork() failed");
248 case 0: break;
249 default:
250 setproctitle("int init");
251 setup_signal_handler(pid);
252 close_fds();
253 exit_as_status_int(reap_children());
254 }
255
256 return true;
257 }
258
259 static void map_uid_gid(uid_t iuid, gid_t igid, uid_t ouid, gid_t ogid)
260 {
261 FILE *fp;
262
263 fp = fopen("/proc/self/setgroups", "w");
264 if (fp) {
265 fputs("deny\n", fp);
266 fclose(fp);
267 }
268
269 fp = fopen("/proc/self/uid_map", "w");
270 fprintf(fp, "%u %u 1\n", iuid, ouid);
271 fclose(fp);
272
273 fp = fopen("/proc/self/gid_map", "w");
274 fprintf(fp, "%u %u 1\n", igid, ogid);
275 fclose(fp);
276 }
277
278 #define a_argument required_argument
279 static const struct option opts[] = {
280 { "pid", a_argument, NULL, 1 },
281 { NULL, 0, NULL, 0 },
282 };
283
284 static void usage(void)
285 {
286 puts("Usage: unshare [options] <program>");
287 exit(EX_USAGE);
288 }
289
290 int main(int argc, char *argv[])
291 {
292 int c;
293 FILE *pidfp;
294 const char *pid = NULL;
295 bool newipc = false;
296 bool newmnt = false;
297 bool newnet = false;
298 bool newpid = false;
299 bool newuts = false;
300 bool newusr = false;
301 bool daemonize = false;
302 uid_t uid;
303 gid_t gid;
304
305 title_argv = argv;
306
307 while ((c = getopt_long(argc, argv, "+DimnpuU", opts, NULL)) != -1) {
308 switch (c) {
309 case 1:
310 pid = optarg;
311 break;
312 case 'i': newipc = true; break;
313 case 'm': newmnt = true; break;
314 case 'n': newnet = true; break;
315 case 'p': newpid = true; break;
316 case 'u': newuts = true; break;
317 case 'U': newusr = true; break;
318 case 'D': daemonize = true; break;
319 default:
320 usage();
321 }
322 }
323 argc -= optind;
324 argv += optind;
325
326 if (argc == 0)
327 usage();
328
329 if (newusr) {
330 uid = getuid();
331 gid = getgid();
332 if (vunshare(CLONE_NEWUSER))
333 map_uid_gid(0, 0, uid, gid);
334 else
335 newusr = false;
336 }
337
338 if (newmnt || newpid)
339 vunshare(CLONE_NEWNS);
340 if (newuts)
341 vunshare(CLONE_NEWUTS);
342 if (newipc)
343 vunshare(CLONE_NEWIPC);
344 if (newnet)
345 unshare_net();
346
347 if (pid) {
348 pidfp = fopen(pid, "we");
349 if (pidfp == NULL)
350 err(1, "fopen(%s) failed", pid);
351 }
352
353 if (newpid && unshare_pid(daemonize)) {
354 /* Nothing. */
355 } else if (daemonize)
356 if (daemon(1, 0))
357 err(1, "daemon() failed");
358
359 if (pid) {
360 fprintf(pidfp, "%u\n", getpid());
361 fclose(pidfp);
362 }
363
364 if (newusr)
365 if (vunshare(CLONE_NEWUSER))
366 map_uid_gid(uid, gid, 0, 0);
367
368 execvp(argv[0], argv);
369 fprintf(stderr, "%s: %s\n", argv[0], strerror(errno));
370 return 127;
371 }