From 704e25878ce10de201629bc83f81975a4e808b9c Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 21 Oct 2024 09:25:24 -0700 Subject: [PATCH] filter-repo: limit searches for ref values to actual refs The FreeBSD repository has refs that look like ref expressions. For example: refs/tags/vendor/openzfs/2.0-rc3-gfc5966 This is a tag. However, if you rewrite the FreeBSD history to only include the libexec/ftpd directory, then this tag pre-dated any file within that directory and thus should be deleted. Once that tag is deleted, trying to pass it to `git cat-file --batch-check` will result in the value of refs/tags/vendor/openzfs/2.0-rc3-gfc5966 being printed as the value of some blob. refs/tags/vendor/openzfs/2.0-rc3-gfc5966 but notices the `-gfc5966` extension and goes looking for any object whose name begins with `fc5966`. Since it turns out there is one object that starts with that name and there is only one, and it happens to be a blob, `git cat-file` will say, oh, you must have been asking for blob fc5966c9c467e0a7d460498b7581e845d33d89d7 This matters to code that runs after the new history has been writtten and refs have been updated, but before the old history has been pruned. Since we are specifically just wanting to know the values of refs, use show-refs instead. Signed-off-by: Elijah Newren --- git-filter-repo | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 573f5f33..e905f6ef 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -4194,8 +4194,8 @@ class RepoFilter(object): original_hash = old_commit_unrenames.get(old_hash, old_hash) old_ref_map[refname] = (original_hash, deleted_hash) - batch_check_process = None - batch_check_output_re = re.compile(b'^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$') + new_refs = {} + new_refs_initialized = False ref_maps = {} self._orig_graph._ensure_reverse_maps_populated() for refname, pair in old_ref_map.items(): @@ -4209,36 +4209,22 @@ class RepoFilter(object): else: new_hash = intermediate else: # Must be either an annotated tag, or a ref whose tip was pruned - if not batch_check_process: - cmd = 'git cat-file --batch-check'.split() + if not new_refs_initialized: target_working_dir = self._args.target or b'.' - batch_check_process = subproc.Popen(cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - cwd=target_working_dir) - batch_check_process.stdin.write(refname+b"\n") - batch_check_process.stdin.flush() - line = batch_check_process.stdout.readline() - m = batch_check_output_re.match(line) - if m and m.group(2) in (b'tag', b'commit'): - new_hash = m.group(1) - elif line.endswith(b' missing\n'): - new_hash = deleted_hash + new_refs = GitUtils.get_refs(target_working_dir) + if refname in new_refs: + new_hash = new_refs[refname] else: - raise SystemExit(_("Failed to find new id for %(refname)s " - "(old id was %(old_hash)s)") - % ({'refname': refname, 'old_hash': old_hash}) - ) # pragma: no cover + new_hash = deleted_hash ref_maps[refname] = (old_hash, new_hash) if self._args.source or self._args.target: - new_refs = GitUtils.get_refs(self._args.target or b'.') + if not new_refs_initialized: + target_working_dir = self._args.target or b'.' + new_refs = GitUtils.get_refs(target_working_dir) for ref, new_hash in new_refs.items(): if ref not in orig_refs and not ref.startswith(b'refs/replace/'): old_hash = b'0'*len(new_hash) ref_maps[ref] = (old_hash, new_hash) - if batch_check_process: - batch_check_process.stdin.close() - batch_check_process.wait() # # Third, handle first_changes