Skip to content

Commit 78e6b21

Browse files
authored
Improve checkIfPRContentChanged (#22611)
The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton <[email protected]>
1 parent e9cd18b commit 78e6b21

File tree

2 files changed

+61
-53
lines changed

2 files changed

+61
-53
lines changed

modules/util/io.go

+22
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package util
55

66
import (
7+
"errors"
78
"io"
89
)
910

@@ -17,3 +18,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
1718
}
1819
return n, err
1920
}
21+
22+
// ErrNotEmpty is an error reported when there is a non-empty reader
23+
var ErrNotEmpty = errors.New("not-empty")
24+
25+
// IsEmptyReader reads a reader and ensures it is empty
26+
func IsEmptyReader(r io.Reader) (err error) {
27+
var buf [1]byte
28+
29+
for {
30+
n, err := r.Read(buf[:])
31+
if err != nil {
32+
if err == io.EOF {
33+
return nil
34+
}
35+
return err
36+
}
37+
if n > 0 {
38+
return ErrNotEmpty
39+
}
40+
}
41+
}

services/pull/pull.go

+39-53
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@
44
package pull
55

66
import (
7-
"bufio"
8-
"bytes"
97
"context"
108
"fmt"
119
"io"
10+
"os"
1211
"regexp"
1312
"strings"
14-
"time"
1513

1614
"code.gitea.io/gitea/models"
1715
"code.gitea.io/gitea/models/db"
@@ -29,6 +27,7 @@ import (
2927
repo_module "code.gitea.io/gitea/modules/repository"
3028
"code.gitea.io/gitea/modules/setting"
3129
"code.gitea.io/gitea/modules/sync"
30+
"code.gitea.io/gitea/modules/util"
3231
issue_service "code.gitea.io/gitea/services/issue"
3332
)
3433

@@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
351350
// checkIfPRContentChanged checks if diff to target branch has changed by push
352351
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
353352
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
354-
if err = pr.LoadHeadRepo(ctx); err != nil {
355-
return false, fmt.Errorf("LoadHeadRepo: %w", err)
356-
} else if pr.HeadRepo == nil {
357-
// corrupt data assumed changed
358-
return true, nil
359-
}
360-
361-
if err = pr.LoadBaseRepo(ctx); err != nil {
362-
return false, fmt.Errorf("LoadBaseRepo: %w", err)
363-
}
364-
365-
headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath())
353+
tmpBasePath, err := createTemporaryRepo(ctx, pr)
366354
if err != nil {
367-
return false, fmt.Errorf("OpenRepository: %w", err)
368-
}
369-
defer headGitRepo.Close()
370-
371-
// Add a temporary remote.
372-
tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano())
373-
if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil {
374-
return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
355+
log.Error("CreateTemporaryRepo: %v", err)
356+
return false, err
375357
}
376358
defer func() {
377-
if err := headGitRepo.RemoveRemote(tmpRemote); err != nil {
378-
log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
359+
if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
360+
log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err)
379361
}
380362
}()
381-
// To synchronize repo and get a base ref
382-
_, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch)
363+
364+
tmpRepo, err := git.OpenRepository(ctx, tmpBasePath)
383365
if err != nil {
384-
return false, fmt.Errorf("GetMergeBase: %w", err)
366+
return false, fmt.Errorf("OpenRepository: %w", err)
385367
}
368+
defer tmpRepo.Close()
386369

387-
diffBefore := &bytes.Buffer{}
388-
diffAfter := &bytes.Buffer{}
389-
if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil {
390-
// If old commit not found, assume changed.
391-
log.Debug("GetDiffFromMergeBase: %v", err)
392-
return true, nil
393-
}
394-
if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil {
395-
// New commit should be found
396-
return false, fmt.Errorf("GetDiffFromMergeBase: %w", err)
370+
// Find the merge-base
371+
_, base, err := tmpRepo.GetMergeBase("", "base", "tracking")
372+
if err != nil {
373+
return false, fmt.Errorf("GetMergeBase: %w", err)
397374
}
398375

399-
diffBeforeLines := bufio.NewScanner(diffBefore)
400-
diffAfterLines := bufio.NewScanner(diffAfter)
401-
402-
for diffBeforeLines.Scan() && diffAfterLines.Scan() {
403-
if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") {
404-
// file hashes can change without the diff changing
405-
continue
406-
} else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") {
407-
// the location of the difference may change
408-
continue
409-
} else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) {
376+
cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base)
377+
stdoutReader, stdoutWriter, err := os.Pipe()
378+
if err != nil {
379+
return false, fmt.Errorf("unable to open pipe for to run diff: %w", err)
380+
}
381+
382+
if err := cmd.Run(&git.RunOpts{
383+
Dir: tmpBasePath,
384+
Stdout: stdoutWriter,
385+
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
386+
_ = stdoutWriter.Close()
387+
defer func() {
388+
_ = stdoutReader.Close()
389+
}()
390+
return util.IsEmptyReader(stdoutReader)
391+
},
392+
}); err != nil {
393+
if err == util.ErrNotEmpty {
410394
return true, nil
411395
}
412-
}
413396

414-
if diffBeforeLines.Scan() || diffAfterLines.Scan() {
415-
// Diffs not of equal length
416-
return true, nil
397+
log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v",
398+
newCommitID, oldCommitID, base,
399+
pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch,
400+
err)
401+
402+
return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err)
417403
}
418404

419405
return false, nil

0 commit comments

Comments
 (0)