]> git.wh0rd.org - home.git/blame - .bin/git-repack
git-repack: rewrite with pathlib
[home.git] / .bin / git-repack
CommitLineData
f4cf6be5
MF
1#!/usr/bin/python3
2
3# pylint: disable=fixme,invalid-name
4# pylint: disable=too-many-branches,too-many-locals,too-many-statements
c7e7c6e4
MF
5
6"""Repack git repos fully the way I like them."""
7
8from __future__ import print_function
9
10import argparse
c7e7c6e4 11import os
2b31c456 12from pathlib import Path
c7e7c6e4
MF
13import shutil
14import subprocess
15import sys
16import tempfile
2b31c456 17from typing import Dict, List, Optional
c7e7c6e4
MF
18
19
2b31c456 20def mount_settings() -> Dict[str, str]:
c7e7c6e4
MF
21 """Return dict mapping path to its type"""
22 ret = {}
2b31c456 23 with Path('/proc/mounts').open(encoding='utf-8') as fp:
c7e7c6e4
MF
24 for line in fp:
25 ele = line.split()
26 ret[ele[1]] = ele[2]
27 return ret
28
29
2b31c456 30def is_git_dir(path: Path) -> bool:
c3fa90a1 31 """Whether |path| is a .git dir"""
2b31c456
MF
32 return ((path / 'refs').is_dir() and
33 (path / 'objects').is_dir() and
34 (path / 'config').is_file())
c3fa90a1
MF
35
36
2b31c456 37def find_git_dir(path: Path) -> Path:
c7e7c6e4
MF
38 """Try to find the .git dir to operate on"""
39 orig_path = path
2b31c456 40 real_path = path = path.resolve()
c7e7c6e4
MF
41 while True:
42 curr_path = path
2b31c456
MF
43 if (path / '.git').is_dir():
44 curr_path = path / '.git'
c7e7c6e4 45
c3fa90a1 46 if is_git_dir(curr_path):
c7e7c6e4
MF
47 return curr_path
48
2b31c456
MF
49 parent = path.parent
50 if path == parent:
c7e7c6e4
MF
51 raise ValueError('could not locate .git dir: %s (%s)' %
52 (orig_path, real_path))
2b31c456 53 path = parent
c7e7c6e4
MF
54
55
2b31c456 56def find_temp_dir() -> Optional[Path]:
c7e7c6e4
MF
57 """Find a good temp dir (one backed by tmpfs)"""
58 SEARCH_PATHS = (
59 '/var/tmp/portage',
60 '/var/tmp',
61 '/tmp',
62 tempfile.gettempdir(),
63 )
64 mounts = mount_settings()
65 for path in SEARCH_PATHS:
66 if mounts.get(path) == 'tmpfs':
2b31c456 67 return Path(path)
c7e7c6e4
MF
68 return None
69
70
2b31c456 71def readfile(path: Path) -> str:
c7e7c6e4 72 """Read |path| and return its data"""
2b31c456
MF
73 if path.is_file():
74 return path.read_text(encoding='utf-8')
aefecd57 75 return ''
c7e7c6e4
MF
76
77
c7e7c6e4
MF
78def clean_hooks(path):
79 """Strip out sample files from hooks/"""
2b31c456
MF
80 for hook in (path / 'hooks').glob('*.sample'):
81 print('Trimming hook:', hook)
82 hook.unlink()
c7e7c6e4
MF
83
84
85def clean_packs(path):
86 """Strip out temp files from objects/packs/"""
2b31c456
MF
87 for pack in (path / 'objects' / 'packs').glob('tmp_pack_*'):
88 print('Trimming pack:', pack)
89 pack.unlink()
c7e7c6e4
MF
90
91
92def is_packed(path):
93 """See if the git repo is already packed"""
2b31c456
MF
94 obj_path = path / 'objects'
95 paths = {x.name for x in obj_path.iterdir()}
f4cf6be5 96 if paths not in ({'info', 'pack'}, {'pack'}):
c7e7c6e4 97 return False
2b31c456 98 packs = tuple((obj_path / 'pack').iterdir())
c7e7c6e4
MF
99 if len(packs) != 2:
100 return False
101 return True
102
103
2b31c456 104def repack(path: Path):
c7e7c6e4
MF
105 """Clean up and trim cruft and repack |path|"""
106 path = find_git_dir(path)
2b31c456 107 print('Repacking', path)
c7e7c6e4 108
c3fa90a1 109 # Repack any submodules this project might use.
2b31c456
MF
110 modules_path = path / 'modules'
111 if modules_path.is_dir():
c3fa90a1 112 for root, dirs, _ in os.walk(modules_path):
2b31c456 113 root = Path(root)
c3fa90a1
MF
114 dirs.sort()
115 for d in dirs:
2b31c456 116 mod_path = root / d
c3fa90a1
MF
117 if is_git_dir(mod_path):
118 repack(mod_path)
119
c7e7c6e4
MF
120 tmpdir = find_temp_dir()
121 if tmpdir:
2b31c456
MF
122 tmpdir = Path(tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir))
123 print('Using tempdir:', tmpdir)
124 tmpdir.rmdir()
c3fa90a1 125 # Doesn't matter for these needs.
2b31c456 126 os.environ['GIT_WORK_TREE'] = str(tmpdir)
c7e7c6e4
MF
127
128 grafts = alts = None
129 try:
130 # Push/pop the graft & alternate paths so we don't read them.
131 # XXX: In some cases, this is bad, but I don't use them that way ...
2b31c456 132 graft_file = path / 'info' / 'grafts'
c7e7c6e4 133 grafts = readfile(graft_file)
2b31c456 134 graft_file.unlink(missing_ok=True)
c7e7c6e4 135
2b31c456 136 alt_file = path / 'objects' / 'info' / 'alternates'
c7e7c6e4 137 alts = readfile(alt_file)
2b31c456 138 alt_file.unlink(missing_ok=True)
c7e7c6e4
MF
139
140 clean_hooks(path)
141
3e24d3ae 142 # XXX: Should do this for all remotes?
2b31c456
MF
143 origin_path = path / 'refs' / 'remotes' / 'origin'
144 # Delete remote HEAD as we don't need it, and it might be stale.
145 head = origin_path / 'HEAD'
146 head.unlink(missing_ok=True)
147 packed_refs = readfile(path / 'packed-refs')
148 if origin_path.exists() or 'refs/remotes/origin/' in packed_refs:
149 cmd = ['git', '--git-dir', str(path), 'remote', 'prune', 'origin']
f4cf6be5 150 subprocess.run(cmd, cwd='/', check=True)
c7e7c6e4
MF
151
152 clean_packs(path)
153
154 if is_packed(path):
155 print('Git repo is already packed; nothing to do')
156 return
157
158 if tmpdir:
159 print('Syncing git repo to tempdir')
160 shutil.copytree(path, tmpdir, symlinks=True)
161 rundir = tmpdir
162 else:
163 rundir = path
164
2b31c456
MF
165 cmd = ['git', '--git-dir', str(rundir), 'reflog', 'expire', '--all', '--stale-fix']
166 print('Cleaning reflog:', ' '.join(cmd))
f4cf6be5 167 subprocess.run(cmd, cwd='/', check=True)
e8abb43f 168
c7e7c6e4 169 # This also packs refs/tags for us.
2b31c456
MF
170 cmd = ['git', '--git-dir', str(rundir), 'gc', '--aggressive', '--prune=all']
171 print('Repacking git repo:', ' '.join(cmd))
f4cf6be5 172 subprocess.run(cmd, cwd='/', check=True)
c7e7c6e4 173
26447186 174 # Clean empty dirs.
2b31c456
MF
175 cmd = ['find', str(rundir), '-depth', '-type', 'd', '-exec', 'rmdir', '{}', '+']
176 subprocess.run(cmd, stderr=subprocess.DEVNULL, check=False)
26447186 177
3e24d3ae 178 # There's a few dirs we need to exist even if they're empty.
2b31c456
MF
179 refdir = rundir / 'refs'
180 refdir.mkdir(exist_ok=True)
3e24d3ae 181
c7e7c6e4 182 if tmpdir:
2b31c456
MF
183 cmd = ['rsync', '-a', '--delete', str(tmpdir) + '/', str(path) + '/']
184 print('Syncing back git repo:', ' '.join(cmd))
f4cf6be5 185 subprocess.run(cmd, cwd='/', check=True)
2b31c456 186 cmd = ['find', str(path) + '/', '-exec', 'chmod', 'u+rw', '{}', '+']
f4cf6be5 187 subprocess.run(cmd, cwd='/', check=True)
c7e7c6e4
MF
188
189 finally:
190 if grafts:
2b31c456 191 graft_file.write_text(grafts, encoding='utf-8')
c7e7c6e4 192 if alts:
2b31c456 193 alt_file.write_text(alts, encoding='utf-8')
f4cf6be5
MF
194 if tmpdir:
195 shutil.rmtree(tmpdir, ignore_errors=True)
c7e7c6e4
MF
196
197
198def get_parser():
199 """Get the command line parser"""
200 parser = argparse.ArgumentParser(description=__doc__)
2b31c456 201 parser.add_argument('dir', type=Path, help='The git repo to process')
c7e7c6e4
MF
202 return parser
203
204
2b31c456 205def main(argv: List[str]):
c7e7c6e4
MF
206 """The main script entry point"""
207 parser = get_parser()
208 opts = parser.parse_args(argv)
209 repack(opts.dir)
210
211
212if __name__ == '__main__':
2b31c456 213 sys.exit(main(sys.argv[1:]))