Skip to content

Commit 40c7ea3

Browse files
committed
Improve checkIfPRContentChanged (go-gitea#22611)
Backport go-gitea#22611 The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref go-gitea#22578 Signed-off-by: Andrew Thornton <[email protected]>
1 parent 6be1d71 commit 40c7ea3

File tree

2 files changed

+61
-53
lines changed

2 files changed

+61
-53
lines changed

modules/util/io.go

+22
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package util
66

77
import (
8+
"errors"
89
"io"
910
)
1011

@@ -18,3 +19,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
1819
}
1920
return n, err
2021
}
22+
23+
// ErrNotEmpty is an error reported when there is a non-empty reader
24+
var ErrNotEmpty = errors.New("not-empty")
25+
26+
// IsEmptyReader reads a reader and ensures it is empty
27+
func IsEmptyReader(r io.Reader) (err error) {
28+
var buf [1]byte
29+
30+
for {
31+
n, err := r.Read(buf[:])
32+
if err != nil {
33+
if err == io.EOF {
34+
return nil
35+
}
36+
return err
37+
}
38+
if n > 0 {
39+
return ErrNotEmpty
40+
}
41+
}
42+
}

services/pull/pull.go

+39-53
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55
package pull
66

77
import (
8-
"bufio"
9-
"bytes"
108
"context"
119
"fmt"
1210
"io"
11+
"os"
1312
"regexp"
1413
"strings"
15-
"time"
1614

1715
"code.gitea.io/gitea/models"
1816
"code.gitea.io/gitea/models/db"
@@ -30,6 +28,7 @@ import (
3028
repo_module "code.gitea.io/gitea/modules/repository"
3129
"code.gitea.io/gitea/modules/setting"
3230
"code.gitea.io/gitea/modules/sync"
31+
"code.gitea.io/gitea/modules/util"
3332
issue_service "code.gitea.io/gitea/services/issue"
3433
)
3534

@@ -352,69 +351,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
352351
// checkIfPRContentChanged checks if diff to target branch has changed by push
353352
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
354353
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
355-
if err = pr.LoadHeadRepoCtx(ctx); err != nil {
356-
return false, fmt.Errorf("LoadHeadRepo: %w", err)
357-
} else if pr.HeadRepo == nil {
358-
// corrupt data assumed changed
359-
return true, nil
360-
}
361-
362-
if err = pr.LoadBaseRepoCtx(ctx); err != nil {
363-
return false, fmt.Errorf("LoadBaseRepo: %w", err)
364-
}
365-
366-
headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath())
354+
tmpBasePath, err := createTemporaryRepo(ctx, pr)
367355
if err != nil {
368-
return false, fmt.Errorf("OpenRepository: %w", err)
369-
}
370-
defer headGitRepo.Close()
371-
372-
// Add a temporary remote.
373-
tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano())
374-
if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil {
375-
return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
356+
log.Error("CreateTemporaryRepo: %v", err)
357+
return false, err
376358
}
377359
defer func() {
378-
if err := headGitRepo.RemoveRemote(tmpRemote); err != nil {
379-
log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
360+
if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
361+
log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err)
380362
}
381363
}()
382-
// To synchronize repo and get a base ref
383-
_, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch)
364+
365+
tmpRepo, err := git.OpenRepository(ctx, tmpBasePath)
384366
if err != nil {
385-
return false, fmt.Errorf("GetMergeBase: %w", err)
367+
return false, fmt.Errorf("OpenRepository: %w", err)
386368
}
369+
defer tmpRepo.Close()
387370

388-
diffBefore := &bytes.Buffer{}
389-
diffAfter := &bytes.Buffer{}
390-
if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil {
391-
// If old commit not found, assume changed.
392-
log.Debug("GetDiffFromMergeBase: %v", err)
393-
return true, nil
394-
}
395-
if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil {
396-
// New commit should be found
397-
return false, fmt.Errorf("GetDiffFromMergeBase: %w", err)
371+
// Find the merge-base
372+
_, base, err := tmpRepo.GetMergeBase("", "base", "tracking")
373+
if err != nil {
374+
return false, fmt.Errorf("GetMergeBase: %w", err)
398375
}
399376

400-
diffBeforeLines := bufio.NewScanner(diffBefore)
401-
diffAfterLines := bufio.NewScanner(diffAfter)
402-
403-
for diffBeforeLines.Scan() && diffAfterLines.Scan() {
404-
if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") {
405-
// file hashes can change without the diff changing
406-
continue
407-
} else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") {
408-
// the location of the difference may change
409-
continue
410-
} else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) {
377+
cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base)
378+
stdoutReader, stdoutWriter, err := os.Pipe()
379+
if err != nil {
380+
return false, fmt.Errorf("unable to open pipe for to run diff: %w", err)
381+
}
382+
383+
if err := cmd.Run(&git.RunOpts{
384+
Dir: tmpBasePath,
385+
Stdout: stdoutWriter,
386+
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
387+
_ = stdoutWriter.Close()
388+
defer func() {
389+
_ = stdoutReader.Close()
390+
}()
391+
return util.IsEmptyReader(stdoutReader)
392+
},
393+
}); err != nil {
394+
if err == util.ErrNotEmpty {
411395
return true, nil
412396
}
413-
}
414397

415-
if diffBeforeLines.Scan() || diffAfterLines.Scan() {
416-
// Diffs not of equal length
417-
return true, nil
398+
log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v",
399+
newCommitID, oldCommitID, base,
400+
pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch,
401+
err)
402+
403+
return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err)
418404
}
419405

420406
return false, nil

0 commit comments

Comments
 (0)