X-Git-Url: https://git.wh0rd.org/?p=dump.git;a=blobdiff_plain;f=dump%2Ftape.c;h=ec1b31117eb6e1092dcf68b43d576d3bb214bd88;hp=b44f9e04309405f96f0724111d49774eb23f84d5;hb=bfff36e172f87dd9f159121ee48b995da4905ea6;hpb=cae88c2de27ac671cc2fea240fd5952a508cab3c diff --git a/dump/tape.c b/dump/tape.c index b44f9e0..ec1b311 100644 --- a/dump/tape.c +++ b/dump/tape.c @@ -37,7 +37,7 @@ #ifndef lint static const char rcsid[] = - "$Id: tape.c,v 1.81 2004/03/01 10:52:53 stelian Exp $"; + "$Id: tape.c,v 1.94 2011/06/10 12:41:54 stelian Exp $"; #endif /* not lint */ #include @@ -60,7 +60,11 @@ int write(), read(); #ifdef __linux__ #include #include -#include /* for definition of BLKFLSBUF */ +#include +#include /* for definition of BLKFLSBUF */ +#ifndef BLKFLSBUF /* last resort... */ +#define BLKFLSBUF _IO(0x12, 97) /* Flush buffer cache. */ +#endif #include #endif #include @@ -116,8 +120,8 @@ int eot_code = 1; long long tapea_bytes = 0; /* bytes_written at start of current volume */ static int magtapeout; /* output is really a tape */ -static ssize_t dump_atomic_read __P((int, void *, size_t)); -static ssize_t dump_atomic_write __P((int, const void *, size_t)); +static ssize_t dump_atomic_read __P((int, char *, size_t)); +static ssize_t dump_atomic_write __P((int, const char *, size_t)); #ifdef WRITEDEBUG static void doslave __P((int, int, int)); #else @@ -142,7 +146,7 @@ static int MkTapeString __P((struct s_spcl *, long long)); * The following structure defines the instruction packets sent to slaves. */ struct req { - daddr_t dblk; + ext2_loff_t dblk; int count; }; int reqsiz; @@ -174,15 +178,64 @@ static int tapea_volume; /* value of spcl.c_tapea at volume start */ int master; /* pid of master, for sending error signals */ int tenths; /* length of tape overhead per block written */ -static int caught; /* have we caught the signal to proceed? */ -static int ready; /* have we reached the lock point without having */ +static int caught1; /* have we caught the signal to proceed? */ +static int ready1; /* have we reached the lock point without having */ + /* received the SIGUSR2 signal from the prev slave? */ +static sigjmp_buf jmpbuf1; /* where to jump to if we are ready when the */ + /* SIGUSR1 arrives from the previous slave */ +static int caught2; /* have we caught the signal to proceed? */ +static int ready2; /* have we reached the lock point without having */ /* received the SIGUSR2 signal from the prev slave? */ -static sigjmp_buf jmpbuf; /* where to jump to if we are ready when the */ +static sigjmp_buf jmpbuf2; /* where to jump to if we are ready when the */ /* SIGUSR2 arrives from the previous slave */ #ifdef USE_QFA static int gtperr = 0; #endif +/* + * Determine if we can use Linux' clone system call. If so, call it + * with the CLONE_IO flag so that all processes will share the same I/O + * context, allowing the I/O schedulers to make better scheduling decisions. + */ +#ifdef __linux__ +/* first, pull in the header files that define sys_clone and CLONE_IO */ +#include +#define _GNU_SOURCE +#include +#include +#undef _GNU_SOURCE + +/* If either is not present, fall back on the fork behaviour */ +#if ! defined(SYS_clone) || ! defined (CLONE_IO) +#define fork_clone_io fork +#else /* SYS_clone */ +/* CLONE_IO is available, determine which version of sys_clone to use */ +#include +/* + * Kernel 2.5.49 introduced two extra parameters to the clone system call. + * Neither is useful in our case, so this is easy to handle. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,49) +/* + * Parameters of the sys_clone syscall are + * clone_flags, child_stack, parent_tidptr, child_tidptr + * on all architectures except s390 and s390x + * s390* have child_stack, clone_flags, parent_tidptr, child_tidptr + */ +#if defined(__s390__) || defined(__s390x__) +#define CLONE_ARGS 0, SIGCHLD|CLONE_IO, NULL, NULL +#else +#define CLONE_ARGS SIGCHLD|CLONE_IO, 0, NULL, NULL +#endif +#else +#define CLONE_ARGS SIGCHLD|CLONE_IO, 0 +#endif /* LINUX_VERSION_CODE */ +pid_t fork_clone_io(void); +#endif /* SYS_clone */ +#else /* __linux__ not defined */ +#define fork_clone_io fork +#endif /* __linux__ */ + int alloctape(void) { @@ -234,7 +287,7 @@ void writerec(const void *dp, int isspcl) { - slp->req[trecno].dblk = (daddr_t)0; + slp->req[trecno].dblk = (ext2_loff_t)0; slp->req[trecno].count = 1; /* XXX post increment triggers an egcs-1.1.2-12 bug on alpha/sparc */ *(union u_spcl *)(*(nextblock)) = *(union u_spcl *)dp; @@ -242,7 +295,7 @@ writerec(const void *dp, int isspcl) /* Need to write it to the archive file */ if (! AfileActive && isspcl && (spcl.c_type == TS_END)) AfileActive = 1; - if (AfileActive && Afile >= 0) { + if (AfileActive && Afile >= 0 && !(spcl.c_flags & DR_EXTATTRIBUTES)) { /* When we dump an inode which is not a directory, * it means we ended the archive contents */ if (isspcl && (spcl.c_type == TS_INODE) && @@ -272,9 +325,10 @@ writerec(const void *dp, int isspcl) } void -dumpblock(daddr_t blkno, int size) +dumpblock(blk_t blkno, int size) { - int avail, tpblks, dblkno; + int avail, tpblks; + ext2_loff_t dblkno; dblkno = fsbtodb(sblock, blkno); tpblks = size >> tp_bshift; @@ -415,6 +469,10 @@ flushtape(void) int siz = (char *)nextblock - (char *)slp->req; + /* make sure returned has sane values in case we don't read + * them from the slave in this pass */ + returned.unclen = returned.clen = writesize; + slp->req[trecno].count = 0; /* Sentinel */ if (dump_atomic_write( slp->fd, (char *)slp->req, siz) != siz) @@ -470,10 +528,14 @@ flushtape(void) } blks = 0; - if (spcl.c_type != TS_END) { - for (i = 0; i < spcl.c_count; i++) - if (spcl.c_addr[i] != 0) - blks++; + if (spcl.c_type == TS_CLRI || spcl.c_type == TS_BITS) + blks = spcl.c_count; + else { + if (spcl.c_type != TS_END) { + for (i = 0; i < spcl.c_count; i++) + if (spcl.c_addr[i] != 0) + blks++; + } } slp->count = lastspclrec + blks + 1 - spcl.c_tapea; slp->tapea = spcl.c_tapea; @@ -573,8 +635,6 @@ close_rewind(void) { int eot_code = 1; (void)trewind(); - if (nexttape || Mflag) - return; if (eot_script) { msg("Launching %s\n", eot_script); eot_code = system_command(eot_script, tape, tapeno); @@ -585,6 +645,8 @@ close_rewind(void) } if (eot_code == 0) return; + if (nexttape || Mflag) + return; if (!nogripe) { msg("Change Volumes: Mount volume #%d\n", tapeno+1); broadcast("CHANGE DUMP VOLUMES!\7\7\n"); @@ -611,6 +673,10 @@ rollforward(void) tslp = &slaves[SLAVES]; ntb = (union u_spcl *)tslp->tblock[1]; + /* make sure returned has sane values in case we don't read + * them from the slave in this pass */ + returned.unclen = returned.clen = writesize; + /* * Each of the N slaves should have requests that need to * be replayed on the next tape. Use the extra slave buffers @@ -738,6 +804,16 @@ rollforward(void) #endif } +#ifdef __linux__ +#if defined(SYS_clone) && defined(CLONE_IO) +pid_t +fork_clone_io(void) +{ + return syscall(SYS_clone, CLONE_ARGS); +} +#endif +#endif + /* * We implement taking and restoring checkpoints on the tape level. * When each tape is opened, a new process is created by forking; this @@ -784,7 +860,7 @@ restore_check_point: /* * All signals are inherited... */ - childpid = fork(); + childpid = fork_clone_io(); if (childpid < 0) { msg("Context save fork fails in parent %d\n", parentpid); Exit(X_ABORT); @@ -957,15 +1033,26 @@ Exit(int status) exit(status); } +/* + * proceed - handler for SIGUSR1, used to synchronize IO between the slaves. + */ +static void +proceed1(UNUSED(int signo)) +{ + if (ready1) + siglongjmp(jmpbuf1, 1); + caught1++; +} + /* * proceed - handler for SIGUSR2, used to synchronize IO between the slaves. */ static void -proceed(UNUSED(int signo)) +proceed2(UNUSED(int signo)) { - if (ready) - siglongjmp(jmpbuf, 1); - caught++; + if (ready2) + siglongjmp(jmpbuf2, 1); + caught2++; } void @@ -987,20 +1074,25 @@ enslave(void) sigaction(SIGTERM, &sa, NULL); /* Slave sends SIGTERM on dumpabort() */ sa.sa_handler = sigpipe; sigaction(SIGPIPE, &sa, NULL); - sa.sa_handler = proceed; + sa.sa_handler = proceed1; + sa.sa_flags = SA_RESTART; + sigaction(SIGUSR1, &sa, NULL); /* Slave sends SIGUSR1 to next slave */ + sa.sa_handler = proceed2; sa.sa_flags = SA_RESTART; sigaction(SIGUSR2, &sa, NULL); /* Slave sends SIGUSR2 to next slave */ } for (i = 0; i < SLAVES; i++) { if (i == slp - &slaves[0]) { - caught = 1; + caught1 = 1; + caught2 = 1; } else { - caught = 0; + caught1 = 0; + caught2 = 0; } if (socketpair(AF_UNIX, SOCK_STREAM, 0, cmd) < 0 || - (slaves[i].pid = fork()) < 0) + (slaves[i].pid = fork_clone_io()) < 0) quit("too many slaves, %d (recompile smaller): %s\n", i, strerror(errno)); @@ -1069,7 +1161,9 @@ killall(void) * previous process before writing to the tape, and sends SIGUSR2 * to the next process when the tape write completes. On tape errors * a SIGUSR1 is sent to the master which then terminates all of the - * slaves. + * slaves. Each process sends SIGUSR1 to the next to signal that it + * is time to start reading from the disk, after it finishes reading + * and moves to the compression phase. */ static void doslave(int cmd, @@ -1087,9 +1181,6 @@ doslave(int cmd, int compresult; volatile int do_compress = !first; unsigned long worklen; -#ifdef HAVE_BZLIB - unsigned int worklen2; -#endif #ifdef HAVE_LZO lzo_align_t __LZO_MMODEL *LZO_WorkMem; #endif @@ -1109,6 +1200,7 @@ doslave(int cmd, sigset_t set; sigemptyset(&set); + sigaddset(&set, SIGUSR1); sigaddset(&set, SIGUSR2); sigprocmask(SIG_BLOCK, &set, NULL); sigemptyset(&set); @@ -1172,6 +1264,15 @@ doslave(int cmd, while ((nread = dump_atomic_read( cmd, (char *)slp->req, reqsiz)) == reqsiz) { struct req *p = slp->req; + /* wait for previous slave to finish reading */ + if (sigsetjmp(jmpbuf1, 1) == 0) { + ready1 = 1; + if (!caught1) + sigsuspend(&set); + } + ready1 = 0; + caught1 = 0; + for (trecno = 0; trecno < ntrec; trecno += p->count, p += p->count) { if (p->dblk) { /* read a disk block */ @@ -1184,6 +1285,8 @@ doslave(int cmd, quit("master/slave protocol botched.\n"); } } + /* signal next slave to start reading */ + (void) kill(nextslave, SIGUSR1); /* Try to write the data... */ wrote = 0; @@ -1226,7 +1329,7 @@ doslave(int cmd, #endif /* HAVE_ZLIB */ #ifdef HAVE_BZLIB if (zipflag == COMPRESS_BZLIB) { - worklen2 = worklen; + unsigned int worklen2 = worklen; compresult = BZ2_bzBuffToBuffCompress( comp_buf->buf, &worklen2, @@ -1244,10 +1347,12 @@ doslave(int cmd, #endif /* HAVE_BZLIB */ #ifdef HAVE_LZO if (zipflag == COMPRESS_LZO) { + lzo_uint worklen2 = worklen; compresult = lzo1x_1_compress((char *)slp->tblock[0],writesize, comp_buf->buf, - (lzo_uintp)&worklen, + &worklen2, LZO_WorkMem); + worklen = worklen2; if (compresult == LZO_E_OK) compresult = 1; else @@ -1275,13 +1380,13 @@ doslave(int cmd, do_compress = compressed; #endif /* HAVE_ZLIB || HAVE_BZLIB || HAVE_LZO */ - if (sigsetjmp(jmpbuf, 1) == 0) { - ready = 1; - if (!caught) + if (sigsetjmp(jmpbuf2, 1) == 0) { + ready2 = 1; + if (!caught2) sigsuspend(&set); } - ready = 0; - caught = 0; + ready2 = 0; + caught2 = 0; #ifdef USE_QFA if (gTapeposfd >= 0) { @@ -1294,7 +1399,8 @@ doslave(int cmd, if ((spclptr->c_magic == NFS_MAGIC) && (spclptr->c_type == TS_INODE) && (spclptr->c_date == gThisDumpDate) && - !(spclptr->c_dinode.di_mode & S_IFDIR) + !(spclptr->c_dinode.di_mode & S_IFDIR) && + !(spclptr->c_flags & DR_EXTATTRIBUTES) ) { foundone = 1; /* if (cntntrecs >= maxntrecs) { only write every maxntrecs amount of data */ @@ -1382,13 +1488,13 @@ doslave(int cmd, * loop until the count is satisfied (or error). */ static ssize_t -dump_atomic_read(int fd, void *buf, size_t count) +dump_atomic_read(int fd, char *buf, size_t count) { int got, need = count; do { while ((got = read(fd, buf, need)) > 0 && (need -= got) > 0) - (char *)buf += got; + buf += got; } while (got == -1 && errno == EINTR); return (got < 0 ? got : (ssize_t)count - need); } @@ -1399,13 +1505,13 @@ dump_atomic_read(int fd, void *buf, size_t count) * loop until the count is satisfied (or error). */ static ssize_t -dump_atomic_write(int fd, const void *buf, size_t count) +dump_atomic_write(int fd, const char *buf, size_t count) { int got, need = count; do { while ((got = write(fd, buf, need)) > 0 && (need -= got) > 0) - (char *)buf += got; + buf += got; } while (got == -1 && errno == EINTR); return (got < 0 ? got : (ssize_t)count - need); }