]> git.wh0rd.org - home.git/commitdiff
git-repack: new repack helper
authorMike Frysinger <vapier@gentoo.org>
Wed, 13 Jan 2016 21:06:53 +0000 (16:06 -0500)
committerMike Frysinger <vapier@gentoo.org>
Wed, 13 Jan 2016 21:06:53 +0000 (16:06 -0500)
.bin/git-repack [new file with mode: 0755]

diff --git a/.bin/git-repack b/.bin/git-repack
new file mode 100755 (executable)
index 0000000..d218a99
--- /dev/null
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+
+"""Repack git repos fully the way I like them."""
+
+from __future__ import print_function
+
+import argparse
+import glob
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+
+def mount_settings():
+    """Return dict mapping path to its type"""
+    ret = {}
+    with open('/proc/mounts') as fp:
+        for line in fp:
+            ele = line.split()
+            ret[ele[1]] = ele[2]
+    return ret
+
+
+def find_git_dir(path):
+    """Try to find the .git dir to operate on"""
+    orig_path = path
+    real_path = path = os.path.realpath(path)
+    while True:
+        curr_path = path
+        if os.path.isdir(os.path.join(path, '.git')):
+            curr_path = os.path.join(path, '.git')
+
+        if (os.path.isdir(os.path.join(curr_path, 'refs')) and
+            os.path.isdir(os.path.join(curr_path, 'objects')) and
+            os.path.isfile(os.path.join(curr_path, 'config'))):
+            return curr_path
+
+        path = os.path.dirname(path)
+
+        if path == '/':
+            raise ValueError('could not locate .git dir: %s (%s)' %
+                             (orig_path, real_path))
+
+
+def find_temp_dir():
+    """Find a good temp dir (one backed by tmpfs)"""
+    SEARCH_PATHS = (
+        '/var/tmp/portage',
+        '/var/tmp',
+        '/tmp',
+        tempfile.gettempdir(),
+    )
+    mounts = mount_settings()
+    for path in SEARCH_PATHS:
+        if mounts.get(path) == 'tmpfs':
+            return path
+    return None
+
+
+def readfile(path):
+    """Read |path| and return its data"""
+    if os.path.isfile(path):
+        return open(path).read()
+
+
+def unlink(path):
+    """Unlink |path| if it exists else do nothing"""
+    if os.path.isfile(path):
+        os.unlink(path)
+
+
+def clean_hooks(path):
+    """Strip out sample files from hooks/"""
+    hooks_path = os.path.join(path, 'hooks')
+    for hook in glob.glob(os.path.join(hooks_path, '*.sample')):
+        print('Trimming hook: %s' % hook)
+        os.unlink(hook)
+
+
+def clean_packs(path):
+    """Strip out temp files from objects/packs/"""
+    packs_path = os.path.join(path, 'objects', 'packs')
+    for pack in glob.glob(os.path.join(packs_path, 'tmp_pack_*')):
+        print('Trimming pack: %s' % pack)
+        os.unlink(pack)
+
+
+def is_packed(path):
+    """See if the git repo is already packed"""
+    if set(('info', 'pack')) != set(os.listdir(path)):
+        return False
+    packs = os.listdir(os.path.join(path, 'pack'))
+    if len(packs) != 2:
+        return False
+    return True
+
+
+def repack(path):
+    """Clean up and trim cruft and repack |path|"""
+    path = find_git_dir(path)
+    print('Repacking %s' % path)
+
+    tmpdir = find_temp_dir()
+    if tmpdir:
+        tmpdir = tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir)
+        print('Using tempdir: %s' % tmpdir)
+        os.rmdir(tmpdir)
+
+    grafts = alts = None
+    try:
+        # Push/pop the graft & alternate paths so we don't read them.
+        # XXX: In some cases, this is bad, but I don't use them that way ...
+        graft_file = os.path.join(path, 'info', 'grafts')
+        grafts = readfile(graft_file)
+        unlink(graft_file)
+
+        alt_file = os.path.join(path, 'objects', 'info', 'alternates')
+        alts = readfile(alt_file)
+        unlink(alt_file)
+
+        clean_hooks(path)
+
+        origin_path = os.path.join(path, 'refs', 'remotes', 'origin')
+        packed_refs = readfile(os.path.join(path, 'packed-refs'))
+        if os.path.exists(origin_path) or 'refs/remotes/origin/' in packed_refs:
+            cmd = ['git', '--git-dir', path, 'remote', 'prune', 'origin']
+            subprocess.check_call(cmd, cwd='/')
+
+        clean_packs(path)
+
+        if is_packed(path):
+            print('Git repo is already packed; nothing to do')
+            return
+
+        if tmpdir:
+            print('Syncing git repo to tempdir')
+            shutil.copytree(path, tmpdir, symlinks=True)
+            rundir = tmpdir
+        else:
+            rundir = path
+
+        # This also packs refs/tags for us.
+        cmd = ['git', '--git-dir', rundir, 'gc', '--aggressive', '--prune=all']
+        print('Repacking git repo: %s' % ' '.join(cmd))
+        subprocess.check_call(cmd, cwd='/')
+
+        if tmpdir:
+            cmd = ['rsync', '-a', '--delete', tmpdir + '/', path + '/']
+            print('Syncing back git repo: %s' % ' '.join(cmd))
+            subprocess.check_call(cmd, cwd='/')
+            cmd = ['find', path + '/', '-exec', 'chmod', 'u+rw', '{}', '+']
+            subprocess.check_call(cmd, cwd='/')
+
+    finally:
+        if grafts:
+            open(graft_file, 'w').write(grafts)
+        if alts:
+            open(alt_file, 'w').write(alts)
+        if tmpdir:
+            shutil.rmtree(tmpdir)
+
+
+def get_parser():
+    """Get the command line parser"""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('dir', help='The git repo to process')
+    return parser
+
+
+def main(argv):
+    """The main script entry point"""
+    parser = get_parser()
+    opts = parser.parse_args(argv)
+    repack(opts.dir)
+
+
+if __name__ == '__main__':
+    exit(main(sys.argv[1:]))