X-Git-Url: https://git.wh0rd.org/?a=blobdiff_plain;f=.bin%2Fgit-repack;h=865ca5e51cc18c06c42e99f8e7b2642df9c925c7;hb=2b31c45684ac321d19189f5f2d0bfe19509f05a3;hp=d218a99944482f651032c924596f3172e2b18708;hpb=c7e7c6e4a612c5de21f6ed0cbae3389c3b66f2f3;p=home.git diff --git a/.bin/git-repack b/.bin/git-repack index d218a99..865ca5e 100755 --- a/.bin/git-repack +++ b/.bin/git-repack @@ -1,50 +1,59 @@ -#!/usr/bin/python +#!/usr/bin/python3 + +# pylint: disable=fixme,invalid-name +# pylint: disable=too-many-branches,too-many-locals,too-many-statements """Repack git repos fully the way I like them.""" from __future__ import print_function import argparse -import glob import os +from pathlib import Path import shutil import subprocess import sys import tempfile +from typing import Dict, List, Optional -def mount_settings(): +def mount_settings() -> Dict[str, str]: """Return dict mapping path to its type""" ret = {} - with open('/proc/mounts') as fp: + with Path('/proc/mounts').open(encoding='utf-8') as fp: for line in fp: ele = line.split() ret[ele[1]] = ele[2] return ret -def find_git_dir(path): +def is_git_dir(path: Path) -> bool: + """Whether |path| is a .git dir""" + return ((path / 'refs').is_dir() and + (path / 'objects').is_dir() and + (path / 'config').is_file()) + + +def find_git_dir(path: Path) -> Path: """Try to find the .git dir to operate on""" orig_path = path - real_path = path = os.path.realpath(path) + real_path = path = path.resolve() while True: curr_path = path - if os.path.isdir(os.path.join(path, '.git')): - curr_path = os.path.join(path, '.git') + if (path / '.git').is_dir(): + curr_path = path / '.git' - if (os.path.isdir(os.path.join(curr_path, 'refs')) and - os.path.isdir(os.path.join(curr_path, 'objects')) and - os.path.isfile(os.path.join(curr_path, 'config'))): + if is_git_dir(curr_path): return curr_path - path = os.path.dirname(path) - - if path == '/': + parent = path.parent + if path == parent: raise ValueError('could not locate .git dir: %s (%s)' % (orig_path, real_path)) + path = parent -def find_temp_dir(): +def find_temp_dir() -> Optional[Path]: """Find a good temp dir (one backed by tmpfs)""" SEARCH_PATHS = ( '/var/tmp/portage', @@ -55,78 +64,90 @@ def find_temp_dir(): mounts = mount_settings() for path in SEARCH_PATHS: if mounts.get(path) == 'tmpfs': - return path + return Path(path) return None -def readfile(path): +def readfile(path: Path) -> str: """Read |path| and return its data""" - if os.path.isfile(path): - return open(path).read() - - -def unlink(path): - """Unlink |path| if it exists else do nothing""" - if os.path.isfile(path): - os.unlink(path) + if path.is_file(): + return path.read_text(encoding='utf-8') + return '' def clean_hooks(path): """Strip out sample files from hooks/""" - hooks_path = os.path.join(path, 'hooks') - for hook in glob.glob(os.path.join(hooks_path, '*.sample')): - print('Trimming hook: %s' % hook) - os.unlink(hook) + for hook in (path / 'hooks').glob('*.sample'): + print('Trimming hook:', hook) + hook.unlink() def clean_packs(path): """Strip out temp files from objects/packs/""" - packs_path = os.path.join(path, 'objects', 'packs') - for pack in glob.glob(os.path.join(packs_path, 'tmp_pack_*')): - print('Trimming pack: %s' % pack) - os.unlink(pack) + for pack in (path / 'objects' / 'packs').glob('tmp_pack_*'): + print('Trimming pack:', pack) + pack.unlink() def is_packed(path): """See if the git repo is already packed""" - if set(('info', 'pack')) != set(os.listdir(path)): + obj_path = path / 'objects' + paths = {x.name for x in obj_path.iterdir()} + if paths not in ({'info', 'pack'}, {'pack'}): return False - packs = os.listdir(os.path.join(path, 'pack')) + packs = tuple((obj_path / 'pack').iterdir()) if len(packs) != 2: return False return True -def repack(path): +def repack(path: Path): """Clean up and trim cruft and repack |path|""" path = find_git_dir(path) - print('Repacking %s' % path) + print('Repacking', path) + + # Repack any submodules this project might use. + modules_path = path / 'modules' + if modules_path.is_dir(): + for root, dirs, _ in os.walk(modules_path): + root = Path(root) + dirs.sort() + for d in dirs: + mod_path = root / d + if is_git_dir(mod_path): + repack(mod_path) tmpdir = find_temp_dir() if tmpdir: - tmpdir = tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir) - print('Using tempdir: %s' % tmpdir) - os.rmdir(tmpdir) + tmpdir = Path(tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir)) + print('Using tempdir:', tmpdir) + tmpdir.rmdir() + # Doesn't matter for these needs. + os.environ['GIT_WORK_TREE'] = str(tmpdir) grafts = alts = None try: # Push/pop the graft & alternate paths so we don't read them. # XXX: In some cases, this is bad, but I don't use them that way ... - graft_file = os.path.join(path, 'info', 'grafts') + graft_file = path / 'info' / 'grafts' grafts = readfile(graft_file) - unlink(graft_file) + graft_file.unlink(missing_ok=True) - alt_file = os.path.join(path, 'objects', 'info', 'alternates') + alt_file = path / 'objects' / 'info' / 'alternates' alts = readfile(alt_file) - unlink(alt_file) + alt_file.unlink(missing_ok=True) clean_hooks(path) - origin_path = os.path.join(path, 'refs', 'remotes', 'origin') - packed_refs = readfile(os.path.join(path, 'packed-refs')) - if os.path.exists(origin_path) or 'refs/remotes/origin/' in packed_refs: - cmd = ['git', '--git-dir', path, 'remote', 'prune', 'origin'] - subprocess.check_call(cmd, cwd='/') + # XXX: Should do this for all remotes? + origin_path = path / 'refs' / 'remotes' / 'origin' + # Delete remote HEAD as we don't need it, and it might be stale. + head = origin_path / 'HEAD' + head.unlink(missing_ok=True) + packed_refs = readfile(path / 'packed-refs') + if origin_path.exists() or 'refs/remotes/origin/' in packed_refs: + cmd = ['git', '--git-dir', str(path), 'remote', 'prune', 'origin'] + subprocess.run(cmd, cwd='/', check=True) clean_packs(path) @@ -141,35 +162,47 @@ def repack(path): else: rundir = path + cmd = ['git', '--git-dir', str(rundir), 'reflog', 'expire', '--all', '--stale-fix'] + print('Cleaning reflog:', ' '.join(cmd)) + subprocess.run(cmd, cwd='/', check=True) + # This also packs refs/tags for us. - cmd = ['git', '--git-dir', rundir, 'gc', '--aggressive', '--prune=all'] - print('Repacking git repo: %s' % ' '.join(cmd)) - subprocess.check_call(cmd, cwd='/') + cmd = ['git', '--git-dir', str(rundir), 'gc', '--aggressive', '--prune=all'] + print('Repacking git repo:', ' '.join(cmd)) + subprocess.run(cmd, cwd='/', check=True) + + # Clean empty dirs. + cmd = ['find', str(rundir), '-depth', '-type', 'd', '-exec', 'rmdir', '{}', '+'] + subprocess.run(cmd, stderr=subprocess.DEVNULL, check=False) + + # There's a few dirs we need to exist even if they're empty. + refdir = rundir / 'refs' + refdir.mkdir(exist_ok=True) if tmpdir: - cmd = ['rsync', '-a', '--delete', tmpdir + '/', path + '/'] - print('Syncing back git repo: %s' % ' '.join(cmd)) - subprocess.check_call(cmd, cwd='/') - cmd = ['find', path + '/', '-exec', 'chmod', 'u+rw', '{}', '+'] - subprocess.check_call(cmd, cwd='/') + cmd = ['rsync', '-a', '--delete', str(tmpdir) + '/', str(path) + '/'] + print('Syncing back git repo:', ' '.join(cmd)) + subprocess.run(cmd, cwd='/', check=True) + cmd = ['find', str(path) + '/', '-exec', 'chmod', 'u+rw', '{}', '+'] + subprocess.run(cmd, cwd='/', check=True) finally: if grafts: - open(graft_file, 'w').write(grafts) + graft_file.write_text(grafts, encoding='utf-8') if alts: - open(alt_file, 'w').write(alts) + alt_file.write_text(alts, encoding='utf-8') if tmpdir: - shutil.rmtree(tmpdir) + shutil.rmtree(tmpdir, ignore_errors=True) def get_parser(): """Get the command line parser""" parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('dir', help='The git repo to process') + parser.add_argument('dir', type=Path, help='The git repo to process') return parser -def main(argv): +def main(argv: List[str]): """The main script entry point""" parser = get_parser() opts = parser.parse_args(argv) @@ -177,4 +210,4 @@ def main(argv): if __name__ == '__main__': - exit(main(sys.argv[1:])) + sys.exit(main(sys.argv[1:]))