Skip to content

Commit

Permalink
filter-repo: limit searches for ref values to actual refs
Browse files Browse the repository at this point in the history
The FreeBSD repository has refs that look like ref expressions.  For
example:

   refs/tags/vendor/openzfs/2.0-rc3-gfc5966

This is a tag.  However, if you rewrite the FreeBSD history to only
include the libexec/ftpd directory, then this tag pre-dated any file
within that directory and thus should be deleted.  Once that tag is
deleted, trying to pass it to `git cat-file --batch-check` will result
in the value of refs/tags/vendor/openzfs/2.0-rc3-gfc5966 being printed
as the value of some blob.

    refs/tags/vendor/openzfs/2.0-rc3-gfc5966

but notices the `-gfc5966` extension and goes looking for any object
whose name begins with `fc5966`.  Since it turns out there is one object
that starts with that name and there is only one, and it happens to be a
blob, `git cat-file` will say, oh, you must have been asking for blob

    fc5966c9c467e0a7d460498b7581e845d33d89d7

This matters to code that runs after the new history has been writtten
and refs have been updated, but before the old history has been pruned.
Since we are specifically just wanting to know the values of refs,
use show-refs instead.

Signed-off-by: Elijah Newren <[email protected]>
  • Loading branch information
newren committed Oct 21, 2024
1 parent 912dbca commit 704e258
Showing 1 changed file with 10 additions and 24 deletions.
34 changes: 10 additions & 24 deletions git-filter-repo
Original file line number Diff line number Diff line change
Expand Up @@ -4194,8 +4194,8 @@ class RepoFilter(object):
original_hash = old_commit_unrenames.get(old_hash, old_hash)
old_ref_map[refname] = (original_hash, deleted_hash)

batch_check_process = None
batch_check_output_re = re.compile(b'^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
new_refs = {}
new_refs_initialized = False
ref_maps = {}
self._orig_graph._ensure_reverse_maps_populated()
for refname, pair in old_ref_map.items():
Expand All @@ -4209,36 +4209,22 @@ class RepoFilter(object):
else:
new_hash = intermediate
else: # Must be either an annotated tag, or a ref whose tip was pruned
if not batch_check_process:
cmd = 'git cat-file --batch-check'.split()
if not new_refs_initialized:
target_working_dir = self._args.target or b'.'
batch_check_process = subproc.Popen(cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
cwd=target_working_dir)
batch_check_process.stdin.write(refname+b"\n")
batch_check_process.stdin.flush()
line = batch_check_process.stdout.readline()
m = batch_check_output_re.match(line)
if m and m.group(2) in (b'tag', b'commit'):
new_hash = m.group(1)
elif line.endswith(b' missing\n'):
new_hash = deleted_hash
new_refs = GitUtils.get_refs(target_working_dir)
if refname in new_refs:
new_hash = new_refs[refname]
else:
raise SystemExit(_("Failed to find new id for %(refname)s "
"(old id was %(old_hash)s)")
% ({'refname': refname, 'old_hash': old_hash})
) # pragma: no cover
new_hash = deleted_hash
ref_maps[refname] = (old_hash, new_hash)
if self._args.source or self._args.target:
new_refs = GitUtils.get_refs(self._args.target or b'.')
if not new_refs_initialized:
target_working_dir = self._args.target or b'.'
new_refs = GitUtils.get_refs(target_working_dir)
for ref, new_hash in new_refs.items():
if ref not in orig_refs and not ref.startswith(b'refs/replace/'):
old_hash = b'0'*len(new_hash)
ref_maps[ref] = (old_hash, new_hash)
if batch_check_process:
batch_check_process.stdin.close()
batch_check_process.wait()

#
# Third, handle first_changes
Expand Down

0 comments on commit 704e258

Please sign in to comment.