Hashlist implementation for directory entries in restore.
authorStelian Pop <stelian@popies.net>
Tue, 14 Dec 2004 14:07:56 +0000 (14:07 +0000)
committerStelian Pop <stelian@popies.net>
Tue, 14 Dec 2004 14:07:56 +0000 (14:07 +0000)
CHANGES
THANKS
restore/restore.c
restore/restore.h
restore/symtab.c
restore/utilities.c

diff --git a/CHANGES b/CHANGES
index 726d0eec9c52e5513c5311d777af9c93809e784c..27a8696f9a2d144fcaee22591b527b227a8454f4 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,4 @@
-$Id: CHANGES,v 1.260 2004/12/10 13:31:20 stelian Exp $
+$Id: CHANGES,v 1.261 2004/12/14 14:07:56 stelian Exp $
 
 Changes between versions 0.4b37 and 0.4b38 (released ????????????)
 ==================================================================
@@ -27,6 +27,12 @@ Changes between versions 0.4b37 and 0.4b38 (released ????????????)
        Thanks to Kyle Wilson <kyle.wilson@amd.com> for reporting the
        bug.
 
+7.     Implemented a hash list for the directory names in restore.
+       The linear list used before caused problems in interactive
+       restores when dealing with directories having thousands of
+       entries. Thanks to Brian Ristuccia <bristuccia@starentnetworks.com>
+       for reporting the bug.
+
 Changes between versions 0.4b36 and 0.4b37 (released July 7, 2004)
 ==================================================================
 
diff --git a/THANKS b/THANKS
index d1a6558eb625ffde278570dada84b44d28f9025c..cb7596d4555a7bcf5acb4be2571272ed5649b6a1 100644 (file)
--- a/THANKS
+++ b/THANKS
@@ -1,4 +1,4 @@
-$Id: THANKS,v 1.88 2004/12/10 13:31:20 stelian Exp $
+$Id: THANKS,v 1.89 2004/12/14 14:07:56 stelian Exp $
 
 Dump and restore were written by the people of the CSRG at the University
 of California, Berkeley.
@@ -100,6 +100,7 @@ Kenneth Porter              shiva@well.com
 Eric S. Raymond                esr@thyrsus.com
 Graham Reed            greed@users.sourceforge.net
 Gunther Reiszig                gunther@mit.edu
+Brian Ristuccia                bristuccia@starentnetworks.com
 David Ronis            ronis@ronispc.chem.mcgill.ca
 Dietrich Rothe         d-rothe@users.sourceforge.net
 Bernhard Sadlowski     sadlowsk@Mathematik.Uni-Bielefeld.DE
index b95e3569cd8b79e07c21ecc8c2f13c73fb5c8458..e8b9027bee6a4df93c1ddb1642d9030eb9422122 100644 (file)
@@ -37,7 +37,7 @@
 
 #ifndef lint
 static const char rcsid[] =
-       "$Id: restore.c,v 1.33 2003/11/22 16:52:16 stelian Exp $";
+       "$Id: restore.c,v 1.34 2004/12/14 14:07:57 stelian Exp $";
 #endif /* not lint */
 
 #include <config.h>
@@ -572,19 +572,24 @@ findunreflinks(void)
 {
        struct entry *ep, *np;
        dump_ino_t i;
+       int j;
 
        Vprintf(stdout, "Find unreferenced names.\n");
        for (i = ROOTINO; i < maxino; i++) {
                ep = lookupino(i);
                if (ep == NULL || ep->e_type == LEAF || TSTINO(i, dumpmap) == 0)
                        continue;
-               for (np = ep->e_entries; np != NULL; np = np->e_sibling) {
-                       if (np->e_flags == 0) {
-                               Dprintf(stdout,
-                                   "%s: remove unreferenced name\n",
-                                   myname(np));
-                               removeleaf(np);
-                               freeentry(np);
+               if (ep->e_entries == NULL)
+                       continue;
+               for (j = 0; j < DIRHASH_SIZE; j++) {
+                       for (np = ep->e_entries[j]; np != NULL; np = np->e_sibling) {
+                               if (np->e_flags == 0) {
+                                       Dprintf(stdout,
+                                           "%s: remove unreferenced name\n",
+                                           myname(np));
+                                       removeleaf(np);
+                                       freeentry(np);
+                               }
                        }
                }
        }
@@ -592,15 +597,19 @@ findunreflinks(void)
         * Any leaves remaining in removed directories is unreferenced.
         */
        for (ep = removelist; ep != NULL; ep = ep->e_next) {
-               for (np = ep->e_entries; np != NULL; np = np->e_sibling) {
-                       if (np->e_type == LEAF) {
-                               if (np->e_flags != 0)
-                                       badentry(np, "unreferenced with flags");
-                               Dprintf(stdout,
-                                   "%s: remove unreferenced name\n",
-                                   myname(np));
-                               removeleaf(np);
-                               freeentry(np);
+               if (ep->e_entries == NULL)
+                       continue;
+               for (j = 0; j < DIRHASH_SIZE; j++) {
+                       for (np = ep->e_entries[j]; np != NULL; np = np->e_sibling) {
+                               if (np->e_type == LEAF) {
+                                       if (np->e_flags != 0)
+                                               badentry(np, "unreferenced with flags");
+                                       Dprintf(stdout,
+                                           "%s: remove unreferenced name\n",
+                                           myname(np));
+                                       removeleaf(np);
+                                       freeentry(np);
+                               }
                        }
                }
        }
@@ -627,8 +636,13 @@ removeoldnodes(void)
                prev = &removelist;
                for (ep = removelist; ep != NULL; ep = *prev) {
                        if (ep->e_entries != NULL) {
-                               prev = &ep->e_next;
-                               continue;
+                               int i;
+                               for (i = 0; i < DIRHASH_SIZE; i++) {
+                                       if (ep->e_entries[i] != NULL) {
+                                               prev = &ep->e_next;
+                                               continue;
+                                       }
+                               }
                        }
                        *prev = ep->e_next;
                        removenode(ep);
index bf6284b126c986950fc50c75284b94fafad0ec6e..36cb88ae486b67c2649c8eddf2c99613334e248b 100644 (file)
@@ -5,7 +5,7 @@
  *     Stelian Pop <stelian@popies.net>, 1999-2000
  *     Stelian Pop <stelian@popies.net> - AlcĂ´ve <www.alcove.com>, 2000-2002
  *
- *     $Id: restore.h,v 1.29 2004/04/13 13:04:33 stelian Exp $
+ *     $Id: restore.h,v 1.30 2004/12/14 14:07:58 stelian Exp $
  */
 
 /*
@@ -92,6 +92,8 @@ extern int    compare_errors; /* did we encounter any compare errors? */
 extern char    filesys[NAMELEN];/* name of dumped filesystem */
 extern dump_ino_t volinfo[];   /* which inode on which volume archive info */
 
+#define DIRHASH_SIZE 1024
+
 /*
  * Each file in the file system is described by one of these entries
  */
@@ -105,7 +107,7 @@ struct entry {
        struct  entry *e_parent;        /* pointer to parent directory (..) */
        struct  entry *e_sibling;       /* next element in this directory (.) */
        struct  entry *e_links;         /* hard links to this inode */
-       struct  entry *e_entries;       /* for directories, their entries */
+       struct  entry **e_entries;      /* for directories, their entries */
        struct  entry *e_next;          /* hash chain list */
 };
 /* types */
index 3e6d3fc2b6674a31a076dcb2eb3204790e1159d3..8cd384c51dd36a7f2ba622f75f0214bdf93fd268 100644 (file)
@@ -37,7 +37,7 @@
 
 #ifndef lint
 static const char rcsid[] =
-       "$Id: symtab.c,v 1.22 2003/10/26 16:05:48 stelian Exp $";
+       "$Id: symtab.c,v 1.23 2004/12/14 14:07:58 stelian Exp $";
 #endif /* not lint */
 
 /*
@@ -100,6 +100,25 @@ static long entrytblsize;
 static void             addino __P((dump_ino_t, struct entry *));
 static struct entry    *lookupparent __P((char *));
 static void             removeentry __P((struct entry *));
+static int              dir_hash __P((char *));
+
+/*
+ * Returns a hash given a file name
+ */
+static int
+dir_hash(char *name)
+{
+       unsigned long hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+       int len = strlen(name);
+       
+       while (len--) {
+               unsigned long hash = hash1 + (hash0 ^ (*name++ * 7152373));
+               if (hash & 0x80000000) hash -= 0x7fffffff;
+               hash1 = hash0;
+               hash0 = hash; 
+       }       
+       return hash0 % DIRHASH_SIZE;
+}
 
 /*
  * Look up an entry by inode number
@@ -166,21 +185,45 @@ deleteino(dump_ino_t inum)
 struct entry *
 lookupname(char *name)
 {
-       struct entry *ep;
+       struct entry *ep, *oldep;
        char *np, *cp;
        char buf[MAXPATHLEN];
 
        cp = name;
-       for (ep = lookupino(ROOTINO); ep != NULL; ep = ep->e_entries) {
+
+       ep = lookupino(ROOTINO);
+       while (ep != NULL) {
                for (np = buf; *cp != '/' && *cp != '\0' &&
                                np < &buf[sizeof(buf)]; )
                        *np++ = *cp++;
                if (np == &buf[sizeof(buf)])
                        break;
                *np = '\0';
-               for ( ; ep != NULL; ep = ep->e_sibling)
-                       if (strcmp(ep->e_name, buf) == 0)
-                               break;
+
+               oldep = ep;
+
+               if (strcmp(ep->e_name, buf) != 0 &&
+                   ep->e_entries != NULL) {
+
+                       ep = ep->e_entries[dir_hash(buf)];
+                       for ( ; ep != NULL; ep = ep->e_sibling)
+                               if (strcmp(ep->e_name, buf) == 0)
+                                       break;
+
+                       /* search all hash lists for renamed inodes */
+                       if (ep == NULL) {
+                               int j;
+                               for (j = 0; j < DIRHASH_SIZE; j++) {
+                                       ep = oldep->e_entries[j];
+                                       for ( ; ep != NULL; ep = ep->e_sibling)
+                                               if (strcmp(ep->e_name, buf) == 0)
+                                                       break;
+                                       if (ep != NULL)
+                                               break;
+                               }
+                       }
+               }
+
                if (ep == NULL)
                        break;
                if (*cp++ == '\0')
@@ -256,6 +299,11 @@ addentry(char *name, dump_ino_t inum, int type)
                        errx(1, "no memory to extend symbol table");
        }
        np->e_type = type & ~LINK;
+       if (type & NODE) {
+               np->e_entries = calloc(1, DIRHASH_SIZE * sizeof(struct entry *));
+               if (np->e_entries == NULL)
+                       panic("unable to allocate directory entries\n");
+       }
        ep = lookupparent(name);
        if (ep == NULL) {
                if (inum != ROOTINO || lookupino(ROOTINO) != NULL)
@@ -269,8 +317,8 @@ addentry(char *name, dump_ino_t inum, int type)
        np->e_name = savename(strrchr(name, '/') + 1);
        np->e_namlen = strlen(np->e_name);
        np->e_parent = ep;
-       np->e_sibling = ep->e_entries;
-       ep->e_entries = np;
+       np->e_sibling = ep->e_entries[dir_hash(np->e_name)];
+       ep->e_entries[dir_hash(np->e_name)] = np;
        if (type & LINK) {
                ep = lookupino(inum);
                if (ep == NULL)
@@ -300,8 +348,13 @@ freeentry(struct entry *ep)
        if (ep->e_type == NODE) {
                if (ep->e_links != NULL)
                        badentry(ep, "freeing referenced directory");
-               if (ep->e_entries != NULL)
-                       badentry(ep, "freeing non-empty directory");
+               if (ep->e_entries != NULL) {
+                       int i;
+                       for (i = 0; i < DIRHASH_SIZE; i++) {
+                               if (ep->e_entries[i] != NULL) 
+                                       badentry(ep, "freeing non-empty directory");
+                       }
+               }
        }
        if (ep->e_ino != 0) {
                np = lookupino(ep->e_ino);
@@ -344,8 +397,8 @@ moveentry(struct entry *ep, char *newname)
        if (np != ep->e_parent) {
                removeentry(ep);
                ep->e_parent = np;
-               ep->e_sibling = np->e_entries;
-               np->e_entries = ep;
+               ep->e_sibling = np->e_entries[dir_hash(ep->e_name)];
+               np->e_entries[dir_hash(ep->e_name)] = ep;
        }
        cp = strrchr(newname, '/') + 1;
        freename(ep->e_name);
@@ -363,20 +416,40 @@ moveentry(struct entry *ep, char *newname)
 static void
 removeentry(struct entry *ep)
 {
-       struct entry *np;
+       struct entry *np = ep->e_parent;
+       struct entry *entry = np->e_entries[dir_hash(ep->e_name)];
 
-       np = ep->e_parent;
-       if (np->e_entries == ep) {
-               np->e_entries = ep->e_sibling;
+       if (entry == ep) {
+               np->e_entries[dir_hash(ep->e_name)] = ep->e_sibling;
        } else {
-               for (np = np->e_entries; np != NULL; np = np->e_sibling) {
+               for (np = entry; np != NULL; np = np->e_sibling) {
                        if (np->e_sibling == ep) {
                                np->e_sibling = ep->e_sibling;
                                break;
                        }
                }
-               if (np == NULL)
+               if (np == NULL) {
+
+                       /* search all hash lists for renamed inodes */
+                       int j;
+                       for (j = 0; j < DIRHASH_SIZE; j++) {
+                               np = ep->e_parent;
+                               entry = np->e_entries[j];
+                               if (entry == ep) {
+                                       np->e_entries[j] = ep->e_sibling;
+                                       return;
+                               }
+                               else {
+                                       for (np = entry; np != NULL; np = np->e_sibling) {
+                                               if (np->e_sibling == ep) {
+                                                       np->e_sibling = ep->e_sibling;
+                                                       return;
+                                               }
+                                       }
+                               }
+                       }
                        badentry(ep, "cannot find entry in parent list");
+               }
        }
 }
 
@@ -449,6 +522,7 @@ freename(char *name)
 struct symtableheader {
        int32_t volno;
        int32_t stringsize;
+       int32_t hashsize;
        int32_t entrytblsize;
        time_t  dumptime;
        time_t  dumpdate;
@@ -466,9 +540,10 @@ dumpsymtable(char *filename, long checkpt)
        struct entry *ep, *tep;
        dump_ino_t i;
        struct entry temp, *tentry;
-       long mynum = 1, stroff = 0;
+       long mynum = 1, stroff = 0, hashoff = 0;
        FILE *fd;
        struct symtableheader hdr;
+       struct entry *temphash[DIRHASH_SIZE];
 
        Vprintf(stdout, "Check pointing the restore\n");
        if (Nflag)
@@ -490,11 +565,28 @@ dumpsymtable(char *filename, long checkpt)
                               (int)allocsize(ep->e_namlen), fd);
                }
        }
+       /*
+        * Write out e_entries tables
+        */
+       for (i = WINO; i <= maxino; i++) {
+               for (ep = lookupino(i); ep != NULL; ep = ep->e_links) {
+                       if (ep->e_entries != NULL) {
+                               int j;
+                               memcpy(temphash, ep->e_entries, DIRHASH_SIZE * sizeof(struct entry *));
+                               for (j = 0; j < DIRHASH_SIZE; j++) {
+                                       if (temphash[j])
+                                               temphash[j] = (struct entry *)ep->e_entries[j]->e_index;
+                               }
+                               fwrite(temphash, DIRHASH_SIZE, sizeof(struct entry *), fd);
+                       }
+               }
+       }
        /*
         * Convert pointers to indexes, and output
         */
        tep = &temp;
        stroff = 0;
+       hashoff = 0;
        for (i = WINO; i <= maxino; i++) {
                for (ep = lookupino(i); ep != NULL; ep = ep->e_links) {
                        memmove(tep, ep, sizeof(struct entry));
@@ -507,9 +599,10 @@ dumpsymtable(char *filename, long checkpt)
                        if (ep->e_sibling != NULL)
                                tep->e_sibling =
                                        (struct entry *)ep->e_sibling->e_index;
-                       if (ep->e_entries != NULL)
-                               tep->e_entries =
-                                       (struct entry *)ep->e_entries->e_index;
+                       if (ep->e_entries != NULL) {
+                               tep->e_entries = (struct entry **)hashoff;
+                               hashoff += DIRHASH_SIZE * sizeof(struct entry *);
+                       }
                        if (ep->e_next != NULL)
                                tep->e_next =
                                        (struct entry *)ep->e_next->e_index;
@@ -530,6 +623,7 @@ dumpsymtable(char *filename, long checkpt)
        hdr.maxino = maxino;
        hdr.entrytblsize = entrytblsize;
        hdr.stringsize = stroff;
+       hdr.hashsize = hashoff;
        hdr.dumptime = dumptime;
        hdr.dumpdate = dumpdate;
        hdr.ntrec = ntrec;
@@ -617,7 +711,7 @@ initsymtable(char *filename)
        entrytblsize = hdr.entrytblsize;
        entry = (struct entry **)
                (base + tblsize - (entrytblsize * sizeof(struct entry *)));
-       baseep = (struct entry *)(base + hdr.stringsize - sizeof(struct entry));
+       baseep = (struct entry *)(base + hdr.stringsize + hdr.hashsize - sizeof(struct entry));
        lep = (struct entry *)entry;
        for (i = 0; i < entrytblsize; i++) {
                if (entry[i] == NULL)
@@ -631,8 +725,16 @@ initsymtable(char *filename)
                        ep->e_sibling = &baseep[(long)ep->e_sibling];
                if (ep->e_links != NULL)
                        ep->e_links = &baseep[(long)ep->e_links];
-               if (ep->e_entries != NULL)
-                       ep->e_entries = &baseep[(long)ep->e_entries];
+               if (ep->e_type == NODE) {
+                       int i;
+                       ep->e_entries = (struct entry **)(base + hdr.stringsize + (long)ep->e_entries);
+                       for (i = 0; i < DIRHASH_SIZE; i++) {
+                               if (ep->e_entries[i])
+                                       ep->e_entries[i] = &baseep[(long)ep->e_entries[i]];
+                       }
+               }
+               else
+                       ep->e_entries = NULL;
                if (ep->e_next != NULL)
                        ep->e_next = &baseep[(long)ep->e_next];
        }
index e0db9afa83b5c68e9ed70d73e81450901f4a2123..e96dcf319fd97d70251005edc8a1539bba79edaf 100644 (file)
@@ -37,7 +37,7 @@
 
 #ifndef lint
 static const char rcsid[] =
-       "$Id: utilities.c,v 1.24 2003/11/22 16:52:16 stelian Exp $";
+       "$Id: utilities.c,v 1.25 2004/12/14 14:07:58 stelian Exp $";
 #endif /* not lint */
 
 #include <config.h>
@@ -187,8 +187,13 @@ removenode(struct entry *ep)
 
        if (ep->e_type != NODE)
                badentry(ep, "removenode: not a node");
-       if (ep->e_entries != NULL)
-               badentry(ep, "removenode: non-empty directory");
+       if (ep->e_entries != NULL) {
+               int i;
+               for (i = 0; i < DIRHASH_SIZE; i++) {
+                       if (ep->e_entries[i] != NULL)
+                               badentry(ep, "removenode: non-empty directory");
+               }
+       }
        ep->e_flags |= REMOVED;
        ep->e_flags &= ~TMPNAME;
        cp = myname(ep);
@@ -371,8 +376,15 @@ badentry(struct entry *ep, const char *msg)
        fprintf(stderr, "parent name %s\n", myname(ep->e_parent));
        if (ep->e_sibling != NULL)
                fprintf(stderr, "sibling name: %s\n", myname(ep->e_sibling));
-       if (ep->e_entries != NULL)
-               fprintf(stderr, "next entry name: %s\n", myname(ep->e_entries));
+       if (ep->e_entries != NULL) {
+               int i;
+               for (i = 0; i < DIRHASH_SIZE; i++) {
+                       if (ep->e_entries[i] != NULL) {
+                               fprintf(stderr, "next entry name: %s\n", myname(ep->e_entries[0]));
+                               break;
+                       }
+               }
+       }
        if (ep->e_links != NULL)
                fprintf(stderr, "next link name: %s\n", myname(ep->e_links));
        if (ep->e_next != NULL)