]> git.wh0rd.org - home.git/blob - .bin/git-repack
git-repack: rewrite with pathlib
[home.git] / .bin / git-repack
1 #!/usr/bin/python3
2
3 # pylint: disable=fixme,invalid-name
4 # pylint: disable=too-many-branches,too-many-locals,too-many-statements
5
6 """Repack git repos fully the way I like them."""
7
8 from __future__ import print_function
9
10 import argparse
11 import os
12 from pathlib import Path
13 import shutil
14 import subprocess
15 import sys
16 import tempfile
17 from typing import Dict, List, Optional
18
19
20 def mount_settings() -> Dict[str, str]:
21 """Return dict mapping path to its type"""
22 ret = {}
23 with Path('/proc/mounts').open(encoding='utf-8') as fp:
24 for line in fp:
25 ele = line.split()
26 ret[ele[1]] = ele[2]
27 return ret
28
29
30 def is_git_dir(path: Path) -> bool:
31 """Whether |path| is a .git dir"""
32 return ((path / 'refs').is_dir() and
33 (path / 'objects').is_dir() and
34 (path / 'config').is_file())
35
36
37 def find_git_dir(path: Path) -> Path:
38 """Try to find the .git dir to operate on"""
39 orig_path = path
40 real_path = path = path.resolve()
41 while True:
42 curr_path = path
43 if (path / '.git').is_dir():
44 curr_path = path / '.git'
45
46 if is_git_dir(curr_path):
47 return curr_path
48
49 parent = path.parent
50 if path == parent:
51 raise ValueError('could not locate .git dir: %s (%s)' %
52 (orig_path, real_path))
53 path = parent
54
55
56 def find_temp_dir() -> Optional[Path]:
57 """Find a good temp dir (one backed by tmpfs)"""
58 SEARCH_PATHS = (
59 '/var/tmp/portage',
60 '/var/tmp',
61 '/tmp',
62 tempfile.gettempdir(),
63 )
64 mounts = mount_settings()
65 for path in SEARCH_PATHS:
66 if mounts.get(path) == 'tmpfs':
67 return Path(path)
68 return None
69
70
71 def readfile(path: Path) -> str:
72 """Read |path| and return its data"""
73 if path.is_file():
74 return path.read_text(encoding='utf-8')
75 return ''
76
77
78 def clean_hooks(path):
79 """Strip out sample files from hooks/"""
80 for hook in (path / 'hooks').glob('*.sample'):
81 print('Trimming hook:', hook)
82 hook.unlink()
83
84
85 def clean_packs(path):
86 """Strip out temp files from objects/packs/"""
87 for pack in (path / 'objects' / 'packs').glob('tmp_pack_*'):
88 print('Trimming pack:', pack)
89 pack.unlink()
90
91
92 def is_packed(path):
93 """See if the git repo is already packed"""
94 obj_path = path / 'objects'
95 paths = {x.name for x in obj_path.iterdir()}
96 if paths not in ({'info', 'pack'}, {'pack'}):
97 return False
98 packs = tuple((obj_path / 'pack').iterdir())
99 if len(packs) != 2:
100 return False
101 return True
102
103
104 def repack(path: Path):
105 """Clean up and trim cruft and repack |path|"""
106 path = find_git_dir(path)
107 print('Repacking', path)
108
109 # Repack any submodules this project might use.
110 modules_path = path / 'modules'
111 if modules_path.is_dir():
112 for root, dirs, _ in os.walk(modules_path):
113 root = Path(root)
114 dirs.sort()
115 for d in dirs:
116 mod_path = root / d
117 if is_git_dir(mod_path):
118 repack(mod_path)
119
120 tmpdir = find_temp_dir()
121 if tmpdir:
122 tmpdir = Path(tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir))
123 print('Using tempdir:', tmpdir)
124 tmpdir.rmdir()
125 # Doesn't matter for these needs.
126 os.environ['GIT_WORK_TREE'] = str(tmpdir)
127
128 grafts = alts = None
129 try:
130 # Push/pop the graft & alternate paths so we don't read them.
131 # XXX: In some cases, this is bad, but I don't use them that way ...
132 graft_file = path / 'info' / 'grafts'
133 grafts = readfile(graft_file)
134 graft_file.unlink(missing_ok=True)
135
136 alt_file = path / 'objects' / 'info' / 'alternates'
137 alts = readfile(alt_file)
138 alt_file.unlink(missing_ok=True)
139
140 clean_hooks(path)
141
142 # XXX: Should do this for all remotes?
143 origin_path = path / 'refs' / 'remotes' / 'origin'
144 # Delete remote HEAD as we don't need it, and it might be stale.
145 head = origin_path / 'HEAD'
146 head.unlink(missing_ok=True)
147 packed_refs = readfile(path / 'packed-refs')
148 if origin_path.exists() or 'refs/remotes/origin/' in packed_refs:
149 cmd = ['git', '--git-dir', str(path), 'remote', 'prune', 'origin']
150 subprocess.run(cmd, cwd='/', check=True)
151
152 clean_packs(path)
153
154 if is_packed(path):
155 print('Git repo is already packed; nothing to do')
156 return
157
158 if tmpdir:
159 print('Syncing git repo to tempdir')
160 shutil.copytree(path, tmpdir, symlinks=True)
161 rundir = tmpdir
162 else:
163 rundir = path
164
165 cmd = ['git', '--git-dir', str(rundir), 'reflog', 'expire', '--all', '--stale-fix']
166 print('Cleaning reflog:', ' '.join(cmd))
167 subprocess.run(cmd, cwd='/', check=True)
168
169 # This also packs refs/tags for us.
170 cmd = ['git', '--git-dir', str(rundir), 'gc', '--aggressive', '--prune=all']
171 print('Repacking git repo:', ' '.join(cmd))
172 subprocess.run(cmd, cwd='/', check=True)
173
174 # Clean empty dirs.
175 cmd = ['find', str(rundir), '-depth', '-type', 'd', '-exec', 'rmdir', '{}', '+']
176 subprocess.run(cmd, stderr=subprocess.DEVNULL, check=False)
177
178 # There's a few dirs we need to exist even if they're empty.
179 refdir = rundir / 'refs'
180 refdir.mkdir(exist_ok=True)
181
182 if tmpdir:
183 cmd = ['rsync', '-a', '--delete', str(tmpdir) + '/', str(path) + '/']
184 print('Syncing back git repo:', ' '.join(cmd))
185 subprocess.run(cmd, cwd='/', check=True)
186 cmd = ['find', str(path) + '/', '-exec', 'chmod', 'u+rw', '{}', '+']
187 subprocess.run(cmd, cwd='/', check=True)
188
189 finally:
190 if grafts:
191 graft_file.write_text(grafts, encoding='utf-8')
192 if alts:
193 alt_file.write_text(alts, encoding='utf-8')
194 if tmpdir:
195 shutil.rmtree(tmpdir, ignore_errors=True)
196
197
198 def get_parser():
199 """Get the command line parser"""
200 parser = argparse.ArgumentParser(description=__doc__)
201 parser.add_argument('dir', type=Path, help='The git repo to process')
202 return parser
203
204
205 def main(argv: List[str]):
206 """The main script entry point"""
207 parser = get_parser()
208 opts = parser.parse_args(argv)
209 repack(opts.dir)
210
211
212 if __name__ == '__main__':
213 sys.exit(main(sys.argv[1:]))