X-Git-Url: https://git.wh0rd.org/?p=dump.git;a=blobdiff_plain;f=dump%2Ftape.c;h=ec1b31117eb6e1092dcf68b43d576d3bb214bd88;hp=d615960b1dc05b3cedfde591d9ed50002e041dca;hb=bfff36e172f87dd9f159121ee48b995da4905ea6;hpb=e1e4be20b0192a2988eab70a31fb1428c4c81eec diff --git a/dump/tape.c b/dump/tape.c index d615960..ec1b311 100644 --- a/dump/tape.c +++ b/dump/tape.c @@ -37,7 +37,7 @@ #ifndef lint static const char rcsid[] = - "$Id: tape.c,v 1.85 2004/07/01 09:14:49 stelian Exp $"; + "$Id: tape.c,v 1.94 2011/06/10 12:41:54 stelian Exp $"; #endif /* not lint */ #include @@ -60,7 +60,11 @@ int write(), read(); #ifdef __linux__ #include #include -#include /* for definition of BLKFLSBUF */ +#include +#include /* for definition of BLKFLSBUF */ +#ifndef BLKFLSBUF /* last resort... */ +#define BLKFLSBUF _IO(0x12, 97) /* Flush buffer cache. */ +#endif #include #endif #include @@ -174,15 +178,64 @@ static int tapea_volume; /* value of spcl.c_tapea at volume start */ int master; /* pid of master, for sending error signals */ int tenths; /* length of tape overhead per block written */ -static int caught; /* have we caught the signal to proceed? */ -static int ready; /* have we reached the lock point without having */ +static int caught1; /* have we caught the signal to proceed? */ +static int ready1; /* have we reached the lock point without having */ + /* received the SIGUSR2 signal from the prev slave? */ +static sigjmp_buf jmpbuf1; /* where to jump to if we are ready when the */ + /* SIGUSR1 arrives from the previous slave */ +static int caught2; /* have we caught the signal to proceed? */ +static int ready2; /* have we reached the lock point without having */ /* received the SIGUSR2 signal from the prev slave? */ -static sigjmp_buf jmpbuf; /* where to jump to if we are ready when the */ +static sigjmp_buf jmpbuf2; /* where to jump to if we are ready when the */ /* SIGUSR2 arrives from the previous slave */ #ifdef USE_QFA static int gtperr = 0; #endif +/* + * Determine if we can use Linux' clone system call. If so, call it + * with the CLONE_IO flag so that all processes will share the same I/O + * context, allowing the I/O schedulers to make better scheduling decisions. + */ +#ifdef __linux__ +/* first, pull in the header files that define sys_clone and CLONE_IO */ +#include +#define _GNU_SOURCE +#include +#include +#undef _GNU_SOURCE + +/* If either is not present, fall back on the fork behaviour */ +#if ! defined(SYS_clone) || ! defined (CLONE_IO) +#define fork_clone_io fork +#else /* SYS_clone */ +/* CLONE_IO is available, determine which version of sys_clone to use */ +#include +/* + * Kernel 2.5.49 introduced two extra parameters to the clone system call. + * Neither is useful in our case, so this is easy to handle. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,49) +/* + * Parameters of the sys_clone syscall are + * clone_flags, child_stack, parent_tidptr, child_tidptr + * on all architectures except s390 and s390x + * s390* have child_stack, clone_flags, parent_tidptr, child_tidptr + */ +#if defined(__s390__) || defined(__s390x__) +#define CLONE_ARGS 0, SIGCHLD|CLONE_IO, NULL, NULL +#else +#define CLONE_ARGS SIGCHLD|CLONE_IO, 0, NULL, NULL +#endif +#else +#define CLONE_ARGS SIGCHLD|CLONE_IO, 0 +#endif /* LINUX_VERSION_CODE */ +pid_t fork_clone_io(void); +#endif /* SYS_clone */ +#else /* __linux__ not defined */ +#define fork_clone_io fork +#endif /* __linux__ */ + int alloctape(void) { @@ -242,7 +295,7 @@ writerec(const void *dp, int isspcl) /* Need to write it to the archive file */ if (! AfileActive && isspcl && (spcl.c_type == TS_END)) AfileActive = 1; - if (AfileActive && Afile >= 0) { + if (AfileActive && Afile >= 0 && !(spcl.c_flags & DR_EXTATTRIBUTES)) { /* When we dump an inode which is not a directory, * it means we ended the archive contents */ if (isspcl && (spcl.c_type == TS_INODE) && @@ -416,6 +469,10 @@ flushtape(void) int siz = (char *)nextblock - (char *)slp->req; + /* make sure returned has sane values in case we don't read + * them from the slave in this pass */ + returned.unclen = returned.clen = writesize; + slp->req[trecno].count = 0; /* Sentinel */ if (dump_atomic_write( slp->fd, (char *)slp->req, siz) != siz) @@ -471,10 +528,14 @@ flushtape(void) } blks = 0; - if (spcl.c_type != TS_END) { - for (i = 0; i < spcl.c_count; i++) - if (spcl.c_addr[i] != 0) - blks++; + if (spcl.c_type == TS_CLRI || spcl.c_type == TS_BITS) + blks = spcl.c_count; + else { + if (spcl.c_type != TS_END) { + for (i = 0; i < spcl.c_count; i++) + if (spcl.c_addr[i] != 0) + blks++; + } } slp->count = lastspclrec + blks + 1 - spcl.c_tapea; slp->tapea = spcl.c_tapea; @@ -612,6 +673,10 @@ rollforward(void) tslp = &slaves[SLAVES]; ntb = (union u_spcl *)tslp->tblock[1]; + /* make sure returned has sane values in case we don't read + * them from the slave in this pass */ + returned.unclen = returned.clen = writesize; + /* * Each of the N slaves should have requests that need to * be replayed on the next tape. Use the extra slave buffers @@ -739,6 +804,16 @@ rollforward(void) #endif } +#ifdef __linux__ +#if defined(SYS_clone) && defined(CLONE_IO) +pid_t +fork_clone_io(void) +{ + return syscall(SYS_clone, CLONE_ARGS); +} +#endif +#endif + /* * We implement taking and restoring checkpoints on the tape level. * When each tape is opened, a new process is created by forking; this @@ -785,7 +860,7 @@ restore_check_point: /* * All signals are inherited... */ - childpid = fork(); + childpid = fork_clone_io(); if (childpid < 0) { msg("Context save fork fails in parent %d\n", parentpid); Exit(X_ABORT); @@ -958,15 +1033,26 @@ Exit(int status) exit(status); } +/* + * proceed - handler for SIGUSR1, used to synchronize IO between the slaves. + */ +static void +proceed1(UNUSED(int signo)) +{ + if (ready1) + siglongjmp(jmpbuf1, 1); + caught1++; +} + /* * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. */ static void -proceed(UNUSED(int signo)) +proceed2(UNUSED(int signo)) { - if (ready) - siglongjmp(jmpbuf, 1); - caught++; + if (ready2) + siglongjmp(jmpbuf2, 1); + caught2++; } void @@ -988,20 +1074,25 @@ enslave(void) sigaction(SIGTERM, &sa, NULL); /* Slave sends SIGTERM on dumpabort() */ sa.sa_handler = sigpipe; sigaction(SIGPIPE, &sa, NULL); - sa.sa_handler = proceed; + sa.sa_handler = proceed1; + sa.sa_flags = SA_RESTART; + sigaction(SIGUSR1, &sa, NULL); /* Slave sends SIGUSR1 to next slave */ + sa.sa_handler = proceed2; sa.sa_flags = SA_RESTART; sigaction(SIGUSR2, &sa, NULL); /* Slave sends SIGUSR2 to next slave */ } for (i = 0; i < SLAVES; i++) { if (i == slp - &slaves[0]) { - caught = 1; + caught1 = 1; + caught2 = 1; } else { - caught = 0; + caught1 = 0; + caught2 = 0; } if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || - (slaves[i].pid = fork()) < 0) + (slaves[i].pid = fork_clone_io()) < 0) quit("too many slaves, %d (recompile smaller): %s\n", i, strerror(errno)); @@ -1070,7 +1161,9 @@ killall(void) * previous process before writing to the tape, and sends SIGUSR2 * to the next process when the tape write completes. On tape errors * a SIGUSR1 is sent to the master which then terminates all of the - * slaves. + * slaves. Each process sends SIGUSR1 to the next to signal that it + * is time to start reading from the disk, after it finishes reading + * and moves to the compression phase. */ static void doslave(int cmd, @@ -1107,6 +1200,7 @@ doslave(int cmd, sigset_t set; sigemptyset(&set); + sigaddset(&set, SIGUSR1); sigaddset(&set, SIGUSR2); sigprocmask(SIG_BLOCK, &set, NULL); sigemptyset(&set); @@ -1170,6 +1264,15 @@ doslave(int cmd, while ((nread = dump_atomic_read( cmd, (char *)slp->req, reqsiz)) == reqsiz) { struct req *p = slp->req; + /* wait for previous slave to finish reading */ + if (sigsetjmp(jmpbuf1, 1) == 0) { + ready1 = 1; + if (!caught1) + sigsuspend(&set); + } + ready1 = 0; + caught1 = 0; + for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) { if (p->dblk) { /* read a disk block */ @@ -1182,6 +1285,8 @@ doslave(int cmd, quit("master/slave protocol botched.\n"); } } + /* signal next slave to start reading */ + (void) kill(nextslave, SIGUSR1); /* Try to write the data... */ wrote = 0; @@ -1275,13 +1380,13 @@ doslave(int cmd, do_compress = compressed; #endif /* HAVE_ZLIB || HAVE_BZLIB || HAVE_LZO */ - if (sigsetjmp(jmpbuf, 1) == 0) { - ready = 1; - if (!caught) + if (sigsetjmp(jmpbuf2, 1) == 0) { + ready2 = 1; + if (!caught2) sigsuspend(&set); } - ready = 0; - caught = 0; + ready2 = 0; + caught2 = 0; #ifdef USE_QFA if (gTapeposfd >= 0) { @@ -1294,7 +1399,8 @@ doslave(int cmd, if ((spclptr->c_magic == NFS_MAGIC) && (spclptr->c_type == TS_INODE) && (spclptr->c_date == gThisDumpDate) && - !(spclptr->c_dinode.di_mode & S_IFDIR) + !(spclptr->c_dinode.di_mode & S_IFDIR) && + !(spclptr->c_flags & DR_EXTATTRIBUTES) ) { foundone = 1; /* if (cntntrecs >= maxntrecs) { only write every maxntrecs amount of data */