]>
Commit | Line | Data |
---|---|---|
f4cf6be5 MF |
1 | #!/usr/bin/python3 |
2 | ||
3 | # pylint: disable=fixme,invalid-name | |
4 | # pylint: disable=too-many-branches,too-many-locals,too-many-statements | |
c7e7c6e4 MF |
5 | |
6 | """Repack git repos fully the way I like them.""" | |
7 | ||
8 | from __future__ import print_function | |
9 | ||
10 | import argparse | |
11 | import glob | |
12 | import os | |
13 | import shutil | |
14 | import subprocess | |
15 | import sys | |
16 | import tempfile | |
17 | ||
18 | ||
19 | def mount_settings(): | |
20 | """Return dict mapping path to its type""" | |
21 | ret = {} | |
22 | with open('/proc/mounts') as fp: | |
23 | for line in fp: | |
24 | ele = line.split() | |
25 | ret[ele[1]] = ele[2] | |
26 | return ret | |
27 | ||
28 | ||
c3fa90a1 MF |
29 | def is_git_dir(path): |
30 | """Whether |path| is a .git dir""" | |
31 | return (os.path.isdir(os.path.join(path, 'refs')) and | |
32 | os.path.isdir(os.path.join(path, 'objects')) and | |
33 | os.path.isfile(os.path.join(path, 'config'))) | |
34 | ||
35 | ||
c7e7c6e4 MF |
36 | def find_git_dir(path): |
37 | """Try to find the .git dir to operate on""" | |
38 | orig_path = path | |
39 | real_path = path = os.path.realpath(path) | |
40 | while True: | |
41 | curr_path = path | |
42 | if os.path.isdir(os.path.join(path, '.git')): | |
43 | curr_path = os.path.join(path, '.git') | |
44 | ||
c3fa90a1 | 45 | if is_git_dir(curr_path): |
c7e7c6e4 MF |
46 | return curr_path |
47 | ||
48 | path = os.path.dirname(path) | |
49 | ||
50 | if path == '/': | |
51 | raise ValueError('could not locate .git dir: %s (%s)' % | |
52 | (orig_path, real_path)) | |
53 | ||
54 | ||
55 | def find_temp_dir(): | |
56 | """Find a good temp dir (one backed by tmpfs)""" | |
57 | SEARCH_PATHS = ( | |
58 | '/var/tmp/portage', | |
59 | '/var/tmp', | |
60 | '/tmp', | |
61 | tempfile.gettempdir(), | |
62 | ) | |
63 | mounts = mount_settings() | |
64 | for path in SEARCH_PATHS: | |
65 | if mounts.get(path) == 'tmpfs': | |
66 | return path | |
67 | return None | |
68 | ||
69 | ||
70 | def readfile(path): | |
71 | """Read |path| and return its data""" | |
72 | if os.path.isfile(path): | |
f4cf6be5 MF |
73 | with open(path) as fp: |
74 | return fp.read() | |
aefecd57 | 75 | return '' |
c7e7c6e4 MF |
76 | |
77 | ||
78 | def unlink(path): | |
79 | """Unlink |path| if it exists else do nothing""" | |
80 | if os.path.isfile(path): | |
81 | os.unlink(path) | |
82 | ||
83 | ||
84 | def clean_hooks(path): | |
85 | """Strip out sample files from hooks/""" | |
86 | hooks_path = os.path.join(path, 'hooks') | |
87 | for hook in glob.glob(os.path.join(hooks_path, '*.sample')): | |
88 | print('Trimming hook: %s' % hook) | |
89 | os.unlink(hook) | |
90 | ||
91 | ||
92 | def clean_packs(path): | |
93 | """Strip out temp files from objects/packs/""" | |
94 | packs_path = os.path.join(path, 'objects', 'packs') | |
95 | for pack in glob.glob(os.path.join(packs_path, 'tmp_pack_*')): | |
96 | print('Trimming pack: %s' % pack) | |
97 | os.unlink(pack) | |
98 | ||
99 | ||
100 | def is_packed(path): | |
101 | """See if the git repo is already packed""" | |
c3fa90a1 MF |
102 | obj_path = os.path.join(path, 'objects') |
103 | paths = set(os.listdir(obj_path)) | |
f4cf6be5 | 104 | if paths not in ({'info', 'pack'}, {'pack'}): |
c7e7c6e4 | 105 | return False |
c3fa90a1 | 106 | packs = os.listdir(os.path.join(obj_path, 'pack')) |
c7e7c6e4 MF |
107 | if len(packs) != 2: |
108 | return False | |
109 | return True | |
110 | ||
111 | ||
112 | def repack(path): | |
113 | """Clean up and trim cruft and repack |path|""" | |
114 | path = find_git_dir(path) | |
115 | print('Repacking %s' % path) | |
116 | ||
c3fa90a1 MF |
117 | # Repack any submodules this project might use. |
118 | modules_path = os.path.join(path, 'modules') | |
119 | if os.path.isdir(modules_path): | |
120 | for root, dirs, _ in os.walk(modules_path): | |
121 | dirs.sort() | |
122 | for d in dirs: | |
123 | mod_path = os.path.join(root, d) | |
124 | if is_git_dir(mod_path): | |
125 | repack(mod_path) | |
126 | ||
c7e7c6e4 MF |
127 | tmpdir = find_temp_dir() |
128 | if tmpdir: | |
129 | tmpdir = tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir) | |
130 | print('Using tempdir: %s' % tmpdir) | |
131 | os.rmdir(tmpdir) | |
c3fa90a1 MF |
132 | # Doesn't matter for these needs. |
133 | os.environ['GIT_WORK_TREE'] = tmpdir | |
c7e7c6e4 MF |
134 | |
135 | grafts = alts = None | |
136 | try: | |
137 | # Push/pop the graft & alternate paths so we don't read them. | |
138 | # XXX: In some cases, this is bad, but I don't use them that way ... | |
139 | graft_file = os.path.join(path, 'info', 'grafts') | |
140 | grafts = readfile(graft_file) | |
141 | unlink(graft_file) | |
142 | ||
143 | alt_file = os.path.join(path, 'objects', 'info', 'alternates') | |
144 | alts = readfile(alt_file) | |
145 | unlink(alt_file) | |
146 | ||
147 | clean_hooks(path) | |
148 | ||
3e24d3ae | 149 | # XXX: Should do this for all remotes? |
c7e7c6e4 MF |
150 | origin_path = os.path.join(path, 'refs', 'remotes', 'origin') |
151 | packed_refs = readfile(os.path.join(path, 'packed-refs')) | |
152 | if os.path.exists(origin_path) or 'refs/remotes/origin/' in packed_refs: | |
153 | cmd = ['git', '--git-dir', path, 'remote', 'prune', 'origin'] | |
f4cf6be5 | 154 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 MF |
155 | |
156 | clean_packs(path) | |
157 | ||
158 | if is_packed(path): | |
159 | print('Git repo is already packed; nothing to do') | |
160 | return | |
161 | ||
162 | if tmpdir: | |
163 | print('Syncing git repo to tempdir') | |
164 | shutil.copytree(path, tmpdir, symlinks=True) | |
165 | rundir = tmpdir | |
166 | else: | |
167 | rundir = path | |
168 | ||
e8abb43f MF |
169 | cmd = ['git', '--git-dir', rundir, 'reflog', 'expire', '--all', '--stale-fix'] |
170 | print('Cleaning reflog: %s' % ' '.join(cmd)) | |
f4cf6be5 | 171 | subprocess.run(cmd, cwd='/', check=True) |
e8abb43f | 172 | |
c7e7c6e4 MF |
173 | # This also packs refs/tags for us. |
174 | cmd = ['git', '--git-dir', rundir, 'gc', '--aggressive', '--prune=all'] | |
175 | print('Repacking git repo: %s' % ' '.join(cmd)) | |
f4cf6be5 | 176 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 | 177 | |
26447186 MF |
178 | # Clean empty dirs. |
179 | cmd = ['find', rundir, '-depth', '-type', 'd', '-exec', 'rmdir', '{}', '+'] | |
f4cf6be5 | 180 | subprocess.call(cmd, stderr=subprocess.DEVNULL) |
26447186 | 181 | |
3e24d3ae MF |
182 | # There's a few dirs we need to exist even if they're empty. |
183 | refdir = os.path.join(rundir, 'refs') | |
f4cf6be5 | 184 | os.makedirs(refdir, exist_ok=True) |
3e24d3ae | 185 | |
c7e7c6e4 MF |
186 | if tmpdir: |
187 | cmd = ['rsync', '-a', '--delete', tmpdir + '/', path + '/'] | |
188 | print('Syncing back git repo: %s' % ' '.join(cmd)) | |
f4cf6be5 | 189 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 | 190 | cmd = ['find', path + '/', '-exec', 'chmod', 'u+rw', '{}', '+'] |
f4cf6be5 | 191 | subprocess.run(cmd, cwd='/', check=True) |
c7e7c6e4 MF |
192 | |
193 | finally: | |
194 | if grafts: | |
f4cf6be5 MF |
195 | with open(graft_file, 'w') as fp: |
196 | fp.write(grafts) | |
c7e7c6e4 | 197 | if alts: |
f4cf6be5 MF |
198 | with open(alt_file, 'w') as fp: |
199 | fp.write(alts) | |
200 | if tmpdir: | |
201 | shutil.rmtree(tmpdir, ignore_errors=True) | |
c7e7c6e4 MF |
202 | |
203 | ||
204 | def get_parser(): | |
205 | """Get the command line parser""" | |
206 | parser = argparse.ArgumentParser(description=__doc__) | |
207 | parser.add_argument('dir', help='The git repo to process') | |
208 | return parser | |
209 | ||
210 | ||
211 | def main(argv): | |
212 | """The main script entry point""" | |
213 | parser = get_parser() | |
214 | opts = parser.parse_args(argv) | |
215 | repack(opts.dir) | |
216 | ||
217 | ||
218 | if __name__ == '__main__': | |
219 | exit(main(sys.argv[1:])) |