]>
Commit | Line | Data |
---|---|---|
f4cf6be5 MF |
1 | #!/usr/bin/python3 |
2 | ||
3 | # pylint: disable=fixme,invalid-name | |
4 | # pylint: disable=too-many-branches,too-many-locals,too-many-statements | |
c7e7c6e4 MF |
5 | |
6 | """Repack git repos fully the way I like them.""" | |
7 | ||
8 | from __future__ import print_function | |
9 | ||
10 | import argparse | |
c7e7c6e4 | 11 | import os |
2b31c456 | 12 | from pathlib import Path |
c7e7c6e4 MF |
13 | import shutil |
14 | import subprocess | |
15 | import sys | |
16 | import tempfile | |
2b31c456 | 17 | from typing import Dict, List, Optional |
c7e7c6e4 MF |
18 | |
19 | ||
2b31c456 | 20 | def mount_settings() -> Dict[str, str]: |
c7e7c6e4 MF |
21 | """Return dict mapping path to its type""" |
22 | ret = {} | |
2b31c456 | 23 | with Path('/proc/mounts').open(encoding='utf-8') as fp: |
c7e7c6e4 MF |
24 | for line in fp: |
25 | ele = line.split() | |
26 | ret[ele[1]] = ele[2] | |
27 | return ret | |
28 | ||
29 | ||
2b31c456 | 30 | def is_git_dir(path: Path) -> bool: |
c3fa90a1 | 31 | """Whether |path| is a .git dir""" |
2b31c456 MF |
32 | return ((path / 'refs').is_dir() and |
33 | (path / 'objects').is_dir() and | |
34 | (path / 'config').is_file()) | |
c3fa90a1 MF |
35 | |
36 | ||
2b31c456 | 37 | def find_git_dir(path: Path) -> Path: |
c7e7c6e4 MF |
38 | """Try to find the .git dir to operate on""" |
39 | orig_path = path | |
2b31c456 | 40 | real_path = path = path.resolve() |
c7e7c6e4 MF |
41 | while True: |
42 | curr_path = path | |
2b31c456 MF |
43 | if (path / '.git').is_dir(): |
44 | curr_path = path / '.git' | |
c7e7c6e4 | 45 | |
c3fa90a1 | 46 | if is_git_dir(curr_path): |
c7e7c6e4 MF |
47 | return curr_path |
48 | ||
2b31c456 MF |
49 | parent = path.parent |
50 | if path == parent: | |
c7e7c6e4 MF |
51 | raise ValueError('could not locate .git dir: %s (%s)' % |
52 | (orig_path, real_path)) | |
2b31c456 | 53 | path = parent |
c7e7c6e4 MF |
54 | |
55 | ||
2b31c456 | 56 | def find_temp_dir() -> Optional[Path]: |
c7e7c6e4 MF |
57 | """Find a good temp dir (one backed by tmpfs)""" |
58 | SEARCH_PATHS = ( | |
59 | '/var/tmp/portage', | |
60 | '/var/tmp', | |
61 | '/tmp', | |
62 | tempfile.gettempdir(), | |
63 | ) | |
64 | mounts = mount_settings() | |
65 | for path in SEARCH_PATHS: | |
66 | if mounts.get(path) == 'tmpfs': | |
2b31c456 | 67 | return Path(path) |
c7e7c6e4 MF |
68 | return None |
69 | ||
70 | ||
2b31c456 | 71 | def readfile(path: Path) -> str: |
c7e7c6e4 | 72 | """Read |path| and return its data""" |
2b31c456 MF |
73 | if path.is_file(): |
74 | return path.read_text(encoding='utf-8') | |
aefecd57 | 75 | return '' |
c7e7c6e4 MF |
76 | |
77 | ||
c7e7c6e4 MF |
78 | def clean_hooks(path): |
79 | """Strip out sample files from hooks/""" | |
2b31c456 MF |
80 | for hook in (path / 'hooks').glob('*.sample'): |
81 | print('Trimming hook:', hook) | |
82 | hook.unlink() | |
c7e7c6e4 MF |
83 | |
84 | ||
85 | def clean_packs(path): | |
86 | """Strip out temp files from objects/packs/""" | |
2b31c456 MF |
87 | for pack in (path / 'objects' / 'packs').glob('tmp_pack_*'): |
88 | print('Trimming pack:', pack) | |
89 | pack.unlink() | |
c7e7c6e4 MF |
90 | |
91 | ||
92 | def is_packed(path): | |
93 | """See if the git repo is already packed""" | |
2b31c456 MF |
94 | obj_path = path / 'objects' |
95 | paths = {x.name for x in obj_path.iterdir()} | |
f4cf6be5 | 96 | if paths not in ({'info', 'pack'}, {'pack'}): |
c7e7c6e4 | 97 | return False |
2b31c456 | 98 | packs = tuple((obj_path / 'pack').iterdir()) |
c7e7c6e4 MF |
99 | if len(packs) != 2: |
100 | return False | |
101 | return True | |
102 | ||
103 | ||
2b31c456 | 104 | def repack(path: Path): |
c7e7c6e4 MF |
105 | """Clean up and trim cruft and repack |path|""" |
106 | path = find_git_dir(path) | |
2b31c456 | 107 | print('Repacking', path) |
c7e7c6e4 | 108 | |
c3fa90a1 | 109 | # Repack any submodules this project might use. |
2b31c456 MF |
110 | modules_path = path / 'modules' |
111 | if modules_path.is_dir(): | |
c3fa90a1 | 112 | for root, dirs, _ in os.walk(modules_path): |
2b31c456 | 113 | root = Path(root) |
c3fa90a1 MF |
114 | dirs.sort() |
115 | for d in dirs: | |
2b31c456 | 116 | mod_path = root / d |
c3fa90a1 MF |
117 | if is_git_dir(mod_path): |
118 | repack(mod_path) | |
119 | ||
c7e7c6e4 MF |
120 | tmpdir = find_temp_dir() |
121 | if tmpdir: | |
2b31c456 MF |
122 | tmpdir = Path(tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir)) |
123 | print('Using tempdir:', tmpdir) | |
124 | tmpdir.rmdir() | |
c3fa90a1 | 125 | # Doesn't matter for these needs. |
2b31c456 | 126 | os.environ['GIT_WORK_TREE'] = str(tmpdir) |
c7e7c6e4 MF |
127 | |
128 | grafts = alts = None | |
129 | try: | |
130 | # Push/pop the graft & alternate paths so we don't read them. | |
131 | # XXX: In some cases, this is bad, but I don't use them that way ... | |
2b31c456 | 132 | graft_file = path / 'info' / 'grafts' |
c7e7c6e4 | 133 | grafts = readfile(graft_file) |
2b31c456 | 134 | graft_file.unlink(missing_ok=True) |
c7e7c6e4 | 135 | |
2b31c456 | 136 | alt_file = path / 'objects' / 'info' / 'alternates' |
c7e7c6e4 | 137 | alts = readfile(alt_file) |
2b31c456 | 138 | alt_file.unlink(missing_ok=True) |
c7e7c6e4 MF |
139 | |
140 | clean_hooks(path) | |
141 | ||
3e24d3ae | 142 | # XXX: Should do this for all remotes? |
2b31c456 MF |
143 | origin_path = path / 'refs' / 'remotes' / 'origin' |
144 | # Delete remote HEAD as we don't need it, and it might be stale. | |
145 | head = origin_path / 'HEAD' | |
146 | head.unlink(missing_ok=True) | |
147 | packed_refs = readfile(path / 'packed-refs') | |
148 | if origin_path.exists() or 'refs/remotes/origin/' in packed_refs: | |
149 | cmd = ['git', '--git-dir', str(path), 'remote', 'prune', 'origin'] | |
f4cf6be5 | 150 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 MF |
151 | |
152 | clean_packs(path) | |
153 | ||
154 | if is_packed(path): | |
155 | print('Git repo is already packed; nothing to do') | |
156 | return | |
157 | ||
158 | if tmpdir: | |
159 | print('Syncing git repo to tempdir') | |
160 | shutil.copytree(path, tmpdir, symlinks=True) | |
161 | rundir = tmpdir | |
162 | else: | |
163 | rundir = path | |
164 | ||
2b31c456 MF |
165 | cmd = ['git', '--git-dir', str(rundir), 'reflog', 'expire', '--all', '--stale-fix'] |
166 | print('Cleaning reflog:', ' '.join(cmd)) | |
f4cf6be5 | 167 | subprocess.run(cmd, cwd='/', check=True) |
e8abb43f | 168 | |
c7e7c6e4 | 169 | # This also packs refs/tags for us. |
2b31c456 MF |
170 | cmd = ['git', '--git-dir', str(rundir), 'gc', '--aggressive', '--prune=all'] |
171 | print('Repacking git repo:', ' '.join(cmd)) | |
f4cf6be5 | 172 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 | 173 | |
26447186 | 174 | # Clean empty dirs. |
2b31c456 MF |
175 | cmd = ['find', str(rundir), '-depth', '-type', 'd', '-exec', 'rmdir', '{}', '+'] |
176 | subprocess.run(cmd, stderr=subprocess.DEVNULL, check=False) | |
26447186 | 177 | |
3e24d3ae | 178 | # There's a few dirs we need to exist even if they're empty. |
2b31c456 MF |
179 | refdir = rundir / 'refs' |
180 | refdir.mkdir(exist_ok=True) | |
3e24d3ae | 181 | |
c7e7c6e4 | 182 | if tmpdir: |
2b31c456 MF |
183 | cmd = ['rsync', '-a', '--delete', str(tmpdir) + '/', str(path) + '/'] |
184 | print('Syncing back git repo:', ' '.join(cmd)) | |
f4cf6be5 | 185 | subprocess.run(cmd, cwd='/', check=True) |
2b31c456 | 186 | cmd = ['find', str(path) + '/', '-exec', 'chmod', 'u+rw', '{}', '+'] |
f4cf6be5 | 187 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 MF |
188 | |
189 | finally: | |
190 | if grafts: | |
2b31c456 | 191 | graft_file.write_text(grafts, encoding='utf-8') |
c7e7c6e4 | 192 | if alts: |
2b31c456 | 193 | alt_file.write_text(alts, encoding='utf-8') |
f4cf6be5 MF |
194 | if tmpdir: |
195 | shutil.rmtree(tmpdir, ignore_errors=True) | |
c7e7c6e4 MF |
196 | |
197 | ||
198 | def get_parser(): | |
199 | """Get the command line parser""" | |
200 | parser = argparse.ArgumentParser(description=__doc__) | |
2b31c456 | 201 | parser.add_argument('dir', type=Path, help='The git repo to process') |
c7e7c6e4 MF |
202 | return parser |
203 | ||
204 | ||
2b31c456 | 205 | def main(argv: List[str]): |
c7e7c6e4 MF |
206 | """The main script entry point""" |
207 | parser = get_parser() | |
208 | opts = parser.parse_args(argv) | |
209 | repack(opts.dir) | |
210 | ||
211 | ||
212 | if __name__ == '__main__': | |
2b31c456 | 213 | sys.exit(main(sys.argv[1:])) |