From 741eb2042c0ebc165d6aeadfd4396882d340adb2 Mon Sep 17 00:00:00 2001 From: Stelian Pop Date: Tue, 14 Dec 2004 14:07:56 +0000 Subject: [PATCH] Hashlist implementation for directory entries in restore. --- CHANGES | 8 ++- THANKS | 3 +- restore/restore.c | 52 +++++++++------ restore/restore.h | 6 +- restore/symtab.c | 152 ++++++++++++++++++++++++++++++++++++-------- restore/utilities.c | 22 +++++-- 6 files changed, 190 insertions(+), 53 deletions(-) diff --git a/CHANGES b/CHANGES index 726d0ee..27a8696 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,4 @@ -$Id: CHANGES,v 1.260 2004/12/10 13:31:20 stelian Exp $ +$Id: CHANGES,v 1.261 2004/12/14 14:07:56 stelian Exp $ Changes between versions 0.4b37 and 0.4b38 (released ????????????) ================================================================== @@ -27,6 +27,12 @@ Changes between versions 0.4b37 and 0.4b38 (released ????????????) Thanks to Kyle Wilson for reporting the bug. +7. Implemented a hash list for the directory names in restore. + The linear list used before caused problems in interactive + restores when dealing with directories having thousands of + entries. Thanks to Brian Ristuccia + for reporting the bug. + Changes between versions 0.4b36 and 0.4b37 (released July 7, 2004) ================================================================== diff --git a/THANKS b/THANKS index d1a6558..cb7596d 100644 --- a/THANKS +++ b/THANKS @@ -1,4 +1,4 @@ -$Id: THANKS,v 1.88 2004/12/10 13:31:20 stelian Exp $ +$Id: THANKS,v 1.89 2004/12/14 14:07:56 stelian Exp $ Dump and restore were written by the people of the CSRG at the University of California, Berkeley. @@ -100,6 +100,7 @@ Kenneth Porter shiva@well.com Eric S. Raymond esr@thyrsus.com Graham Reed greed@users.sourceforge.net Gunther Reiszig gunther@mit.edu +Brian Ristuccia bristuccia@starentnetworks.com David Ronis ronis@ronispc.chem.mcgill.ca Dietrich Rothe d-rothe@users.sourceforge.net Bernhard Sadlowski sadlowsk@Mathematik.Uni-Bielefeld.DE diff --git a/restore/restore.c b/restore/restore.c index b95e356..e8b9027 100644 --- a/restore/restore.c +++ b/restore/restore.c @@ -37,7 +37,7 @@ #ifndef lint static const char rcsid[] = - "$Id: restore.c,v 1.33 2003/11/22 16:52:16 stelian Exp $"; + "$Id: restore.c,v 1.34 2004/12/14 14:07:57 stelian Exp $"; #endif /* not lint */ #include @@ -572,19 +572,24 @@ findunreflinks(void) { struct entry *ep, *np; dump_ino_t i; + int j; Vprintf(stdout, "Find unreferenced names.\n"); for (i = ROOTINO; i < maxino; i++) { ep = lookupino(i); if (ep == NULL || ep->e_type == LEAF || TSTINO(i, dumpmap) == 0) continue; - for (np = ep->e_entries; np != NULL; np = np->e_sibling) { - if (np->e_flags == 0) { - Dprintf(stdout, - "%s: remove unreferenced name\n", - myname(np)); - removeleaf(np); - freeentry(np); + if (ep->e_entries == NULL) + continue; + for (j = 0; j < DIRHASH_SIZE; j++) { + for (np = ep->e_entries[j]; np != NULL; np = np->e_sibling) { + if (np->e_flags == 0) { + Dprintf(stdout, + "%s: remove unreferenced name\n", + myname(np)); + removeleaf(np); + freeentry(np); + } } } } @@ -592,15 +597,19 @@ findunreflinks(void) * Any leaves remaining in removed directories is unreferenced. */ for (ep = removelist; ep != NULL; ep = ep->e_next) { - for (np = ep->e_entries; np != NULL; np = np->e_sibling) { - if (np->e_type == LEAF) { - if (np->e_flags != 0) - badentry(np, "unreferenced with flags"); - Dprintf(stdout, - "%s: remove unreferenced name\n", - myname(np)); - removeleaf(np); - freeentry(np); + if (ep->e_entries == NULL) + continue; + for (j = 0; j < DIRHASH_SIZE; j++) { + for (np = ep->e_entries[j]; np != NULL; np = np->e_sibling) { + if (np->e_type == LEAF) { + if (np->e_flags != 0) + badentry(np, "unreferenced with flags"); + Dprintf(stdout, + "%s: remove unreferenced name\n", + myname(np)); + removeleaf(np); + freeentry(np); + } } } } @@ -627,8 +636,13 @@ removeoldnodes(void) prev = &removelist; for (ep = removelist; ep != NULL; ep = *prev) { if (ep->e_entries != NULL) { - prev = &ep->e_next; - continue; + int i; + for (i = 0; i < DIRHASH_SIZE; i++) { + if (ep->e_entries[i] != NULL) { + prev = &ep->e_next; + continue; + } + } } *prev = ep->e_next; removenode(ep); diff --git a/restore/restore.h b/restore/restore.h index bf6284b..36cb88a 100644 --- a/restore/restore.h +++ b/restore/restore.h @@ -5,7 +5,7 @@ * Stelian Pop , 1999-2000 * Stelian Pop - AlcĂ´ve , 2000-2002 * - * $Id: restore.h,v 1.29 2004/04/13 13:04:33 stelian Exp $ + * $Id: restore.h,v 1.30 2004/12/14 14:07:58 stelian Exp $ */ /* @@ -92,6 +92,8 @@ extern int compare_errors; /* did we encounter any compare errors? */ extern char filesys[NAMELEN];/* name of dumped filesystem */ extern dump_ino_t volinfo[]; /* which inode on which volume archive info */ +#define DIRHASH_SIZE 1024 + /* * Each file in the file system is described by one of these entries */ @@ -105,7 +107,7 @@ struct entry { struct entry *e_parent; /* pointer to parent directory (..) */ struct entry *e_sibling; /* next element in this directory (.) */ struct entry *e_links; /* hard links to this inode */ - struct entry *e_entries; /* for directories, their entries */ + struct entry **e_entries; /* for directories, their entries */ struct entry *e_next; /* hash chain list */ }; /* types */ diff --git a/restore/symtab.c b/restore/symtab.c index 3e6d3fc..8cd384c 100644 --- a/restore/symtab.c +++ b/restore/symtab.c @@ -37,7 +37,7 @@ #ifndef lint static const char rcsid[] = - "$Id: symtab.c,v 1.22 2003/10/26 16:05:48 stelian Exp $"; + "$Id: symtab.c,v 1.23 2004/12/14 14:07:58 stelian Exp $"; #endif /* not lint */ /* @@ -100,6 +100,25 @@ static long entrytblsize; static void addino __P((dump_ino_t, struct entry *)); static struct entry *lookupparent __P((char *)); static void removeentry __P((struct entry *)); +static int dir_hash __P((char *)); + +/* + * Returns a hash given a file name + */ +static int +dir_hash(char *name) +{ + unsigned long hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + int len = strlen(name); + + while (len--) { + unsigned long hash = hash1 + (hash0 ^ (*name++ * 7152373)); + if (hash & 0x80000000) hash -= 0x7fffffff; + hash1 = hash0; + hash0 = hash; + } + return hash0 % DIRHASH_SIZE; +} /* * Look up an entry by inode number @@ -166,21 +185,45 @@ deleteino(dump_ino_t inum) struct entry * lookupname(char *name) { - struct entry *ep; + struct entry *ep, *oldep; char *np, *cp; char buf[MAXPATHLEN]; cp = name; - for (ep = lookupino(ROOTINO); ep != NULL; ep = ep->e_entries) { + + ep = lookupino(ROOTINO); + while (ep != NULL) { for (np = buf; *cp != '/' && *cp != '\0' && np < &buf[sizeof(buf)]; ) *np++ = *cp++; if (np == &buf[sizeof(buf)]) break; *np = '\0'; - for ( ; ep != NULL; ep = ep->e_sibling) - if (strcmp(ep->e_name, buf) == 0) - break; + + oldep = ep; + + if (strcmp(ep->e_name, buf) != 0 && + ep->e_entries != NULL) { + + ep = ep->e_entries[dir_hash(buf)]; + for ( ; ep != NULL; ep = ep->e_sibling) + if (strcmp(ep->e_name, buf) == 0) + break; + + /* search all hash lists for renamed inodes */ + if (ep == NULL) { + int j; + for (j = 0; j < DIRHASH_SIZE; j++) { + ep = oldep->e_entries[j]; + for ( ; ep != NULL; ep = ep->e_sibling) + if (strcmp(ep->e_name, buf) == 0) + break; + if (ep != NULL) + break; + } + } + } + if (ep == NULL) break; if (*cp++ == '\0') @@ -256,6 +299,11 @@ addentry(char *name, dump_ino_t inum, int type) errx(1, "no memory to extend symbol table"); } np->e_type = type & ~LINK; + if (type & NODE) { + np->e_entries = calloc(1, DIRHASH_SIZE * sizeof(struct entry *)); + if (np->e_entries == NULL) + panic("unable to allocate directory entries\n"); + } ep = lookupparent(name); if (ep == NULL) { if (inum != ROOTINO || lookupino(ROOTINO) != NULL) @@ -269,8 +317,8 @@ addentry(char *name, dump_ino_t inum, int type) np->e_name = savename(strrchr(name, '/') + 1); np->e_namlen = strlen(np->e_name); np->e_parent = ep; - np->e_sibling = ep->e_entries; - ep->e_entries = np; + np->e_sibling = ep->e_entries[dir_hash(np->e_name)]; + ep->e_entries[dir_hash(np->e_name)] = np; if (type & LINK) { ep = lookupino(inum); if (ep == NULL) @@ -300,8 +348,13 @@ freeentry(struct entry *ep) if (ep->e_type == NODE) { if (ep->e_links != NULL) badentry(ep, "freeing referenced directory"); - if (ep->e_entries != NULL) - badentry(ep, "freeing non-empty directory"); + if (ep->e_entries != NULL) { + int i; + for (i = 0; i < DIRHASH_SIZE; i++) { + if (ep->e_entries[i] != NULL) + badentry(ep, "freeing non-empty directory"); + } + } } if (ep->e_ino != 0) { np = lookupino(ep->e_ino); @@ -344,8 +397,8 @@ moveentry(struct entry *ep, char *newname) if (np != ep->e_parent) { removeentry(ep); ep->e_parent = np; - ep->e_sibling = np->e_entries; - np->e_entries = ep; + ep->e_sibling = np->e_entries[dir_hash(ep->e_name)]; + np->e_entries[dir_hash(ep->e_name)] = ep; } cp = strrchr(newname, '/') + 1; freename(ep->e_name); @@ -363,20 +416,40 @@ moveentry(struct entry *ep, char *newname) static void removeentry(struct entry *ep) { - struct entry *np; + struct entry *np = ep->e_parent; + struct entry *entry = np->e_entries[dir_hash(ep->e_name)]; - np = ep->e_parent; - if (np->e_entries == ep) { - np->e_entries = ep->e_sibling; + if (entry == ep) { + np->e_entries[dir_hash(ep->e_name)] = ep->e_sibling; } else { - for (np = np->e_entries; np != NULL; np = np->e_sibling) { + for (np = entry; np != NULL; np = np->e_sibling) { if (np->e_sibling == ep) { np->e_sibling = ep->e_sibling; break; } } - if (np == NULL) + if (np == NULL) { + + /* search all hash lists for renamed inodes */ + int j; + for (j = 0; j < DIRHASH_SIZE; j++) { + np = ep->e_parent; + entry = np->e_entries[j]; + if (entry == ep) { + np->e_entries[j] = ep->e_sibling; + return; + } + else { + for (np = entry; np != NULL; np = np->e_sibling) { + if (np->e_sibling == ep) { + np->e_sibling = ep->e_sibling; + return; + } + } + } + } badentry(ep, "cannot find entry in parent list"); + } } } @@ -449,6 +522,7 @@ freename(char *name) struct symtableheader { int32_t volno; int32_t stringsize; + int32_t hashsize; int32_t entrytblsize; time_t dumptime; time_t dumpdate; @@ -466,9 +540,10 @@ dumpsymtable(char *filename, long checkpt) struct entry *ep, *tep; dump_ino_t i; struct entry temp, *tentry; - long mynum = 1, stroff = 0; + long mynum = 1, stroff = 0, hashoff = 0; FILE *fd; struct symtableheader hdr; + struct entry *temphash[DIRHASH_SIZE]; Vprintf(stdout, "Check pointing the restore\n"); if (Nflag) @@ -490,11 +565,28 @@ dumpsymtable(char *filename, long checkpt) (int)allocsize(ep->e_namlen), fd); } } + /* + * Write out e_entries tables + */ + for (i = WINO; i <= maxino; i++) { + for (ep = lookupino(i); ep != NULL; ep = ep->e_links) { + if (ep->e_entries != NULL) { + int j; + memcpy(temphash, ep->e_entries, DIRHASH_SIZE * sizeof(struct entry *)); + for (j = 0; j < DIRHASH_SIZE; j++) { + if (temphash[j]) + temphash[j] = (struct entry *)ep->e_entries[j]->e_index; + } + fwrite(temphash, DIRHASH_SIZE, sizeof(struct entry *), fd); + } + } + } /* * Convert pointers to indexes, and output */ tep = &temp; stroff = 0; + hashoff = 0; for (i = WINO; i <= maxino; i++) { for (ep = lookupino(i); ep != NULL; ep = ep->e_links) { memmove(tep, ep, sizeof(struct entry)); @@ -507,9 +599,10 @@ dumpsymtable(char *filename, long checkpt) if (ep->e_sibling != NULL) tep->e_sibling = (struct entry *)ep->e_sibling->e_index; - if (ep->e_entries != NULL) - tep->e_entries = - (struct entry *)ep->e_entries->e_index; + if (ep->e_entries != NULL) { + tep->e_entries = (struct entry **)hashoff; + hashoff += DIRHASH_SIZE * sizeof(struct entry *); + } if (ep->e_next != NULL) tep->e_next = (struct entry *)ep->e_next->e_index; @@ -530,6 +623,7 @@ dumpsymtable(char *filename, long checkpt) hdr.maxino = maxino; hdr.entrytblsize = entrytblsize; hdr.stringsize = stroff; + hdr.hashsize = hashoff; hdr.dumptime = dumptime; hdr.dumpdate = dumpdate; hdr.ntrec = ntrec; @@ -617,7 +711,7 @@ initsymtable(char *filename) entrytblsize = hdr.entrytblsize; entry = (struct entry **) (base + tblsize - (entrytblsize * sizeof(struct entry *))); - baseep = (struct entry *)(base + hdr.stringsize - sizeof(struct entry)); + baseep = (struct entry *)(base + hdr.stringsize + hdr.hashsize - sizeof(struct entry)); lep = (struct entry *)entry; for (i = 0; i < entrytblsize; i++) { if (entry[i] == NULL) @@ -631,8 +725,16 @@ initsymtable(char *filename) ep->e_sibling = &baseep[(long)ep->e_sibling]; if (ep->e_links != NULL) ep->e_links = &baseep[(long)ep->e_links]; - if (ep->e_entries != NULL) - ep->e_entries = &baseep[(long)ep->e_entries]; + if (ep->e_type == NODE) { + int i; + ep->e_entries = (struct entry **)(base + hdr.stringsize + (long)ep->e_entries); + for (i = 0; i < DIRHASH_SIZE; i++) { + if (ep->e_entries[i]) + ep->e_entries[i] = &baseep[(long)ep->e_entries[i]]; + } + } + else + ep->e_entries = NULL; if (ep->e_next != NULL) ep->e_next = &baseep[(long)ep->e_next]; } diff --git a/restore/utilities.c b/restore/utilities.c index e0db9af..e96dcf3 100644 --- a/restore/utilities.c +++ b/restore/utilities.c @@ -37,7 +37,7 @@ #ifndef lint static const char rcsid[] = - "$Id: utilities.c,v 1.24 2003/11/22 16:52:16 stelian Exp $"; + "$Id: utilities.c,v 1.25 2004/12/14 14:07:58 stelian Exp $"; #endif /* not lint */ #include @@ -187,8 +187,13 @@ removenode(struct entry *ep) if (ep->e_type != NODE) badentry(ep, "removenode: not a node"); - if (ep->e_entries != NULL) - badentry(ep, "removenode: non-empty directory"); + if (ep->e_entries != NULL) { + int i; + for (i = 0; i < DIRHASH_SIZE; i++) { + if (ep->e_entries[i] != NULL) + badentry(ep, "removenode: non-empty directory"); + } + } ep->e_flags |= REMOVED; ep->e_flags &= ~TMPNAME; cp = myname(ep); @@ -371,8 +376,15 @@ badentry(struct entry *ep, const char *msg) fprintf(stderr, "parent name %s\n", myname(ep->e_parent)); if (ep->e_sibling != NULL) fprintf(stderr, "sibling name: %s\n", myname(ep->e_sibling)); - if (ep->e_entries != NULL) - fprintf(stderr, "next entry name: %s\n", myname(ep->e_entries)); + if (ep->e_entries != NULL) { + int i; + for (i = 0; i < DIRHASH_SIZE; i++) { + if (ep->e_entries[i] != NULL) { + fprintf(stderr, "next entry name: %s\n", myname(ep->e_entries[0])); + break; + } + } + } if (ep->e_links != NULL) fprintf(stderr, "next link name: %s\n", myname(ep->e_links)); if (ep->e_next != NULL) -- 2.39.2