]>
Commit | Line | Data |
---|---|---|
c7e7c6e4 MF |
1 | #!/usr/bin/python |
2 | ||
3 | """Repack git repos fully the way I like them.""" | |
4 | ||
5 | from __future__ import print_function | |
6 | ||
7 | import argparse | |
8 | import glob | |
9 | import os | |
10 | import shutil | |
11 | import subprocess | |
12 | import sys | |
13 | import tempfile | |
14 | ||
15 | ||
16 | def mount_settings(): | |
17 | """Return dict mapping path to its type""" | |
18 | ret = {} | |
19 | with open('/proc/mounts') as fp: | |
20 | for line in fp: | |
21 | ele = line.split() | |
22 | ret[ele[1]] = ele[2] | |
23 | return ret | |
24 | ||
25 | ||
c3fa90a1 MF |
26 | def is_git_dir(path): |
27 | """Whether |path| is a .git dir""" | |
28 | return (os.path.isdir(os.path.join(path, 'refs')) and | |
29 | os.path.isdir(os.path.join(path, 'objects')) and | |
30 | os.path.isfile(os.path.join(path, 'config'))) | |
31 | ||
32 | ||
c7e7c6e4 MF |
33 | def find_git_dir(path): |
34 | """Try to find the .git dir to operate on""" | |
35 | orig_path = path | |
36 | real_path = path = os.path.realpath(path) | |
37 | while True: | |
38 | curr_path = path | |
39 | if os.path.isdir(os.path.join(path, '.git')): | |
40 | curr_path = os.path.join(path, '.git') | |
41 | ||
c3fa90a1 | 42 | if is_git_dir(curr_path): |
c7e7c6e4 MF |
43 | return curr_path |
44 | ||
45 | path = os.path.dirname(path) | |
46 | ||
47 | if path == '/': | |
48 | raise ValueError('could not locate .git dir: %s (%s)' % | |
49 | (orig_path, real_path)) | |
50 | ||
51 | ||
52 | def find_temp_dir(): | |
53 | """Find a good temp dir (one backed by tmpfs)""" | |
54 | SEARCH_PATHS = ( | |
55 | '/var/tmp/portage', | |
56 | '/var/tmp', | |
57 | '/tmp', | |
58 | tempfile.gettempdir(), | |
59 | ) | |
60 | mounts = mount_settings() | |
61 | for path in SEARCH_PATHS: | |
62 | if mounts.get(path) == 'tmpfs': | |
63 | return path | |
64 | return None | |
65 | ||
66 | ||
67 | def readfile(path): | |
68 | """Read |path| and return its data""" | |
69 | if os.path.isfile(path): | |
70 | return open(path).read() | |
aefecd57 | 71 | return '' |
c7e7c6e4 MF |
72 | |
73 | ||
74 | def unlink(path): | |
75 | """Unlink |path| if it exists else do nothing""" | |
76 | if os.path.isfile(path): | |
77 | os.unlink(path) | |
78 | ||
79 | ||
80 | def clean_hooks(path): | |
81 | """Strip out sample files from hooks/""" | |
82 | hooks_path = os.path.join(path, 'hooks') | |
83 | for hook in glob.glob(os.path.join(hooks_path, '*.sample')): | |
84 | print('Trimming hook: %s' % hook) | |
85 | os.unlink(hook) | |
86 | ||
87 | ||
88 | def clean_packs(path): | |
89 | """Strip out temp files from objects/packs/""" | |
90 | packs_path = os.path.join(path, 'objects', 'packs') | |
91 | for pack in glob.glob(os.path.join(packs_path, 'tmp_pack_*')): | |
92 | print('Trimming pack: %s' % pack) | |
93 | os.unlink(pack) | |
94 | ||
95 | ||
96 | def is_packed(path): | |
97 | """See if the git repo is already packed""" | |
c3fa90a1 MF |
98 | obj_path = os.path.join(path, 'objects') |
99 | paths = set(os.listdir(obj_path)) | |
100 | if {'info', 'pack'} != paths and {'pack'} != paths: | |
c7e7c6e4 | 101 | return False |
c3fa90a1 | 102 | packs = os.listdir(os.path.join(obj_path, 'pack')) |
c7e7c6e4 MF |
103 | if len(packs) != 2: |
104 | return False | |
105 | return True | |
106 | ||
107 | ||
108 | def repack(path): | |
109 | """Clean up and trim cruft and repack |path|""" | |
110 | path = find_git_dir(path) | |
111 | print('Repacking %s' % path) | |
112 | ||
c3fa90a1 MF |
113 | # Repack any submodules this project might use. |
114 | modules_path = os.path.join(path, 'modules') | |
115 | if os.path.isdir(modules_path): | |
116 | for root, dirs, _ in os.walk(modules_path): | |
117 | dirs.sort() | |
118 | for d in dirs: | |
119 | mod_path = os.path.join(root, d) | |
120 | if is_git_dir(mod_path): | |
121 | repack(mod_path) | |
122 | ||
c7e7c6e4 MF |
123 | tmpdir = find_temp_dir() |
124 | if tmpdir: | |
125 | tmpdir = tempfile.mkdtemp(prefix='git-repack.', dir=tmpdir) | |
126 | print('Using tempdir: %s' % tmpdir) | |
127 | os.rmdir(tmpdir) | |
c3fa90a1 MF |
128 | # Doesn't matter for these needs. |
129 | os.environ['GIT_WORK_TREE'] = tmpdir | |
c7e7c6e4 MF |
130 | |
131 | grafts = alts = None | |
132 | try: | |
133 | # Push/pop the graft & alternate paths so we don't read them. | |
134 | # XXX: In some cases, this is bad, but I don't use them that way ... | |
135 | graft_file = os.path.join(path, 'info', 'grafts') | |
136 | grafts = readfile(graft_file) | |
137 | unlink(graft_file) | |
138 | ||
139 | alt_file = os.path.join(path, 'objects', 'info', 'alternates') | |
140 | alts = readfile(alt_file) | |
141 | unlink(alt_file) | |
142 | ||
143 | clean_hooks(path) | |
144 | ||
3e24d3ae | 145 | # XXX: Should do this for all remotes? |
c7e7c6e4 MF |
146 | origin_path = os.path.join(path, 'refs', 'remotes', 'origin') |
147 | packed_refs = readfile(os.path.join(path, 'packed-refs')) | |
148 | if os.path.exists(origin_path) or 'refs/remotes/origin/' in packed_refs: | |
149 | cmd = ['git', '--git-dir', path, 'remote', 'prune', 'origin'] | |
150 | subprocess.check_call(cmd, cwd='/') | |
151 | ||
152 | clean_packs(path) | |
153 | ||
154 | if is_packed(path): | |
155 | print('Git repo is already packed; nothing to do') | |
156 | return | |
157 | ||
158 | if tmpdir: | |
159 | print('Syncing git repo to tempdir') | |
160 | shutil.copytree(path, tmpdir, symlinks=True) | |
161 | rundir = tmpdir | |
162 | else: | |
163 | rundir = path | |
164 | ||
e8abb43f MF |
165 | cmd = ['git', '--git-dir', rundir, 'reflog', 'expire', '--all', '--stale-fix'] |
166 | print('Cleaning reflog: %s' % ' '.join(cmd)) | |
167 | subprocess.check_call(cmd, cwd='/') | |
168 | ||
c7e7c6e4 MF |
169 | # This also packs refs/tags for us. |
170 | cmd = ['git', '--git-dir', rundir, 'gc', '--aggressive', '--prune=all'] | |
171 | print('Repacking git repo: %s' % ' '.join(cmd)) | |
172 | subprocess.check_call(cmd, cwd='/') | |
173 | ||
26447186 MF |
174 | # Clean empty dirs. |
175 | cmd = ['find', rundir, '-depth', '-type', 'd', '-exec', 'rmdir', '{}', '+'] | |
176 | subprocess.call(cmd, stderr=open('/dev/null', 'w')) | |
177 | ||
3e24d3ae MF |
178 | # There's a few dirs we need to exist even if they're empty. |
179 | refdir = os.path.join(rundir, 'refs') | |
180 | if not os.path.isdir(refdir): | |
181 | os.mkdir(refdir) | |
182 | ||
c7e7c6e4 MF |
183 | if tmpdir: |
184 | cmd = ['rsync', '-a', '--delete', tmpdir + '/', path + '/'] | |
185 | print('Syncing back git repo: %s' % ' '.join(cmd)) | |
186 | subprocess.check_call(cmd, cwd='/') | |
187 | cmd = ['find', path + '/', '-exec', 'chmod', 'u+rw', '{}', '+'] | |
188 | subprocess.check_call(cmd, cwd='/') | |
189 | ||
190 | finally: | |
191 | if grafts: | |
192 | open(graft_file, 'w').write(grafts) | |
193 | if alts: | |
194 | open(alt_file, 'w').write(alts) | |
c3fa90a1 | 195 | if tmpdir and os.path.exists(tmpdir): |
c7e7c6e4 MF |
196 | shutil.rmtree(tmpdir) |
197 | ||
198 | ||
199 | def get_parser(): | |
200 | """Get the command line parser""" | |
201 | parser = argparse.ArgumentParser(description=__doc__) | |
202 | parser.add_argument('dir', help='The git repo to process') | |
203 | return parser | |
204 | ||
205 | ||
206 | def main(argv): | |
207 | """The main script entry point""" | |
208 | parser = get_parser() | |
209 | opts = parser.parse_args(argv) | |
210 | repack(opts.dir) | |
211 | ||
212 | ||
213 | if __name__ == '__main__': | |
214 | exit(main(sys.argv[1:])) |