Skip to content

Commit

Permalink
[#2196] Use git blame line info for aggregate blame author modified a…
Browse files Browse the repository at this point in the history
…nd date info (#2232)

`aggregateBlameAuthorModifiedAndDateInfo` is hard to maintain, due to
complex string wrangling and seemingly magic numbers. Let's fix these using
`GitBlameLineInfo` introduced in #2140.

- Use blameLine to abstract away the string wrangling and improve
understandability. Magic numbers are also replaced to improve code quality.    

- In order to use blameLine however, it is also changed to use 
author-time instead of commit-time, as there is a discrepancy between the two,
causing some test cases to fail. 

 - The naming of the timestamp field in GitBlameLineInfo is changed to
reflect that it is in seconds, as author-time is in seconds.
  • Loading branch information
logical-1985516 authored Jan 6, 2025
1 parent 33df258 commit e53c52c
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 60 deletions.
63 changes: 20 additions & 43 deletions src/main/java/reposense/authorship/FileInfoAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,19 @@
import reposense.authorship.model.LineInfo;
import reposense.git.GitBlame;
import reposense.git.GitLog;
import reposense.git.model.GitBlameLineInfo;
import reposense.model.Author;
import reposense.model.CommitHash;
import reposense.model.RepoConfiguration;
import reposense.system.LogsManager;
import reposense.util.FileUtil;
import reposense.util.StringsUtil;

/**
* Analyzes the target and information given in the {@link FileInfo}.
*/
public class FileInfoAnalyzer {
private static final Logger logger = LogsManager.getLogger(FileInfoAnalyzer.class);

private static final int AUTHOR_NAME_OFFSET = "author ".length();
private static final int AUTHOR_EMAIL_OFFSET = "author-mail ".length();
private static final int AUTHOR_TIME_OFFSET = "author-time ".length();
private static final int AUTHOR_TIMEZONE_OFFSET = "author-tz ".length();
private static final int FULL_COMMIT_HASH_LENGTH = 40;

private static final String MESSAGE_FILE_MISSING = "Unable to analyze the file located at \"%s\" "
+ "as the file is missing from your system. Skipping this file.";

Expand Down Expand Up @@ -148,31 +142,21 @@ private FileResult generateBinaryFileResult(RepoConfiguration config, FileInfo f
*/
private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, FileInfo fileInfo,
boolean shouldAnalyzeAuthorship, double originalityThreshold) {
String blameResults;

if (!config.isFindingPreviousAuthorsPerformed()) {
blameResults = getGitBlameResult(config, fileInfo.getPath());
} else {
blameResults = getGitBlameWithPreviousAuthorsResult(config, fileInfo.getPath());
}
List<GitBlameLineInfo> gitBlameLineInfos = getGitBlameFileResult(config, fileInfo.getPath(),
config.isFindingPreviousAuthorsPerformed());

String[] blameResultLines = StringsUtil.NEWLINE.split(blameResults);
Path filePath = Paths.get(fileInfo.getPath());
LocalDateTime sinceDate = config.getSinceDate();
LocalDateTime untilDate = config.getUntilDate();

for (int lineCount = 0; lineCount < blameResultLines.length; lineCount += 5) {
String commitHash = blameResultLines[lineCount].substring(0, FULL_COMMIT_HASH_LENGTH);
String authorName = blameResultLines[lineCount + 1].substring(AUTHOR_NAME_OFFSET);
String authorEmail = blameResultLines[lineCount + 2]
.substring(AUTHOR_EMAIL_OFFSET).replaceAll("<|>", "");
long commitDateInMs = Long.parseLong(blameResultLines[lineCount + 3].substring(AUTHOR_TIME_OFFSET)) * 1000;
LocalDateTime commitDate = LocalDateTime.ofInstant(Instant.ofEpochMilli(commitDateInMs),
config.getZoneId());
Author author = config.getAuthor(authorName, authorEmail);

int lineNumber = lineCount / 5;
if (!fileInfo.isFileLineTracked(lineNumber) || author.isIgnoringFile(filePath)
for (int lineCount = 0; lineCount < gitBlameLineInfos.size(); lineCount++) {
GitBlameLineInfo blameLineInfo = gitBlameLineInfos.get(lineCount);
String commitHash = blameLineInfo.getCommitHash();
LocalDateTime commitDate = LocalDateTime.ofInstant(
Instant.ofEpochSecond(blameLineInfo.getTimestampInSeconds()), config.getZoneId());
Author author = config.getAuthor(blameLineInfo.getAuthorName(), blameLineInfo.getAuthorEmail());

if (!fileInfo.isFileLineTracked(lineCount) || author.isIgnoringFile(filePath)
|| CommitHash.isInsideCommitList(commitHash, config.getIgnoreCommitList())
|| commitDate.isBefore(sinceDate) || commitDate.isAfter(untilDate)) {
author = Author.UNKNOWN_AUTHOR;
Expand All @@ -184,32 +168,25 @@ private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, F
MESSAGE_SHALLOW_CLONING_LAST_MODIFIED_DATE_CONFLICT, config.getRepoName()));
}

fileInfo.setLineLastModifiedDate(lineNumber, commitDate);
fileInfo.setLineLastModifiedDate(lineCount, commitDate);
}
fileInfo.setLineAuthor(lineNumber, author);
fileInfo.setLineAuthor(lineCount, author);

if (shouldAnalyzeAuthorship && !author.equals(Author.UNKNOWN_AUTHOR)) {
String lineContent = fileInfo.getLine(lineNumber + 1).getContent();
String lineContent = fileInfo.getLine(lineCount + 1).getContent();
boolean isFullCredit = AuthorshipAnalyzer.analyzeAuthorship(config, fileInfo.getPath(), lineContent,
commitHash, author, originalityThreshold);
fileInfo.setIsFullCredit(lineNumber, isFullCredit);
fileInfo.setIsFullCredit(lineCount, isFullCredit);
}
}
}

/**
* Returns the analysis result from running git blame on {@code filePath} with reference to the root directory
* given in {@code config}.
*/
private String getGitBlameResult(RepoConfiguration config, String filePath) {
return GitBlame.blame(config.getRepoRoot(), filePath);
}

/**
* Returns the analysis result from running git blame with finding previous authors enabled on {@code filePath}
* with reference to the root directory given in {@code config}.
* Returns the analysis result from running git blame file on {@code filePath} with reference to the root directory
* given in {@code config} and {@code withPreviousAuthors}.
*/
private String getGitBlameWithPreviousAuthorsResult(RepoConfiguration config, String filePath) {
return GitBlame.blameWithPreviousAuthors(config.getRepoRoot(), filePath);
private List<GitBlameLineInfo> getGitBlameFileResult(RepoConfiguration config, String filePath,
boolean withPreviousAuthors) {
return GitBlame.blameFile(config.getRepoRoot(), filePath, withPreviousAuthors);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ public static boolean analyzeAuthorship(RepoConfiguration config, String filePat
GitBlameLineInfo deletedLineInfo = GitBlame.blameLine(config.getRepoRoot(), deletedLine.getGitBlameCommitHash(),
deletedLine.getFilePath(), deletedLine.getLineNumber());
Author previousAuthor = config.getAuthor(deletedLineInfo.getAuthorName(), deletedLineInfo.getAuthorEmail());
long sinceDateInMilliseconds = ZonedDateTime.of(config.getSinceDate(), config.getZoneId()).toEpochSecond();
long sinceDateInSeconds = ZonedDateTime.of(config.getSinceDate(), config.getZoneId()).toEpochSecond();

// Give full credit if author is unknown, is before since date, is in ignored list, or is an ignored file
if (previousAuthor.equals(Author.UNKNOWN_AUTHOR)
|| deletedLineInfo.getTimestampMilliseconds() < sinceDateInMilliseconds
|| deletedLineInfo.getTimestampInSeconds() < sinceDateInSeconds
|| CommitHash.isInsideCommitList(deletedLineInfo.getCommitHash(), config.getIgnoreCommitList())
|| previousAuthor.isIgnoringFile(Paths.get(deletedLine.getFilePath()))) {
return true;
Expand Down
54 changes: 45 additions & 9 deletions src/main/java/reposense/git/GitBlame.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import reposense.git.model.GitBlameLineInfo;
import reposense.util.StringsUtil;
Expand All @@ -16,6 +19,8 @@
public class GitBlame {
public static final String IGNORE_COMMIT_LIST_FILE_NAME = ".git-blame-ignore-revs";

private static final int BLAME_LINE_INFO_ROW_COUNT = 5;

private static final String COMMIT_HASH_REGEX = "(^[0-9a-f]{40} .*)";
private static final String AUTHOR_NAME_REGEX = "(^author .*)";
private static final String AUTHOR_EMAIL_REGEX = "(^author-mail .*)";
Expand All @@ -28,8 +33,9 @@ public class GitBlame {

private static final int AUTHOR_NAME_OFFSET = "author ".length();
private static final int AUTHOR_EMAIL_OFFSET = "author-mail ".length();
private static final int FULL_COMMIT_HASH_LENGTH = 40;
private static final int AUTHOR_TIME_OFFSET = "author-time ".length();
private static final int COMMIT_TIME_OFFSET = "committer-time ".length();
private static final int FULL_COMMIT_HASH_LENGTH = 40;

/**
* Returns the raw git blame result for the {@code fileDirectory}, performed at the {@code root} directory.
Expand Down Expand Up @@ -57,6 +63,17 @@ public static String blameWithPreviousAuthors(String root, String fileDirectory)
return StringsUtil.filterText(runCommand(rootPath, blameCommandWithFindingPreviousAuthors), COMBINATION_REGEX);
}

/**
* Returns the processed git blame result for the {@code fileDirectory} performed at the {@code root} directory,
* with reference to {@code withPreviousAuthors}.
*/
public static List<GitBlameLineInfo> blameFile(String root, String fileDirectory, boolean withPreviousAuthors) {
String blameResults = withPreviousAuthors
? blameWithPreviousAuthors(root, fileDirectory)
: blame(root, fileDirectory);
return processGitBlameResultLines(blameResults);
}

/**
* Returns the git blame result for {@code lineNumber} of {@code fileDirectory} at {@code commitHash}.
*/
Expand All @@ -68,21 +85,40 @@ public static GitBlameLineInfo blameLine(String root, String commitHash, String

String blameResult = StringsUtil.filterText(runCommand(rootPath, blameCommand),
COMBINATION_WITH_COMMIT_TIME_REGEX);

return processGitBlameResultLine(blameResult);
String[] blameResultLines = StringsUtil.NEWLINE.split(blameResult);
return processGitBlameResultLine(blameResultLines, false);
}

/**
* Returns the processed result of {@code blameResult}.
* Returns the processed result of {@code blameResults}.
*/
private static GitBlameLineInfo processGitBlameResultLine(String blameResult) {
String[] blameResultLines = StringsUtil.NEWLINE.split(blameResult);
private static List<GitBlameLineInfo> processGitBlameResultLines(String blameResults) {
String[] blameResultsLines = StringsUtil.NEWLINE.split(blameResults);
List<GitBlameLineInfo> blameFileResult = new ArrayList<>();
for (int lineCount = 0; lineCount < blameResultsLines.length; lineCount += BLAME_LINE_INFO_ROW_COUNT) {
String[] blameResultLines = Arrays
.copyOfRange(blameResultsLines, lineCount, lineCount + BLAME_LINE_INFO_ROW_COUNT - 1);
GitBlameLineInfo blameLineInfo = processGitBlameResultLine(blameResultLines, true);
blameFileResult.add(blameLineInfo);
}
return blameFileResult;
}

/**
* Returns the processed result of {@code blameResultLines}, with reference to {@code useAuthorTime}.
* If {@code useAuthorTime} is false, committer-time will be used for the timestamp.
*/
private static GitBlameLineInfo processGitBlameResultLine(String[] blameResultLines, boolean useAuthorTime) {
String commitHash = blameResultLines[0].substring(0, FULL_COMMIT_HASH_LENGTH);
String authorName = blameResultLines[1].substring(AUTHOR_NAME_OFFSET);
String authorEmail = blameResultLines[2].substring(AUTHOR_EMAIL_OFFSET).replaceAll("[<>]", "");
long timestampMilliseconds = Long.parseLong(blameResultLines[5].substring(COMMIT_TIME_OFFSET));

return new GitBlameLineInfo(commitHash, authorName, authorEmail, timestampMilliseconds);
long timestampInSeconds;
if (useAuthorTime) {
timestampInSeconds = Long.parseLong(blameResultLines[3].substring(AUTHOR_TIME_OFFSET));
} else {
timestampInSeconds = Long.parseLong(blameResultLines[5].substring(COMMIT_TIME_OFFSET));
}

return new GitBlameLineInfo(commitHash, authorName, authorEmail, timestampInSeconds);
}
}
12 changes: 6 additions & 6 deletions src/main/java/reposense/git/model/GitBlameLineInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ public class GitBlameLineInfo {
private final String commitHash;
private final String authorName;
private final String authorEmail;
private final long timestampMilliseconds;
private final long timestampInSeconds;

public GitBlameLineInfo(String commitHash, String authorName, String authorEmail, long timestampMilliseconds) {
public GitBlameLineInfo(String commitHash, String authorName, String authorEmail, long timestampInSeconds) {
this.commitHash = commitHash;
this.authorName = authorName;
this.authorEmail = authorEmail;
this.timestampMilliseconds = timestampMilliseconds;
this.timestampInSeconds = timestampInSeconds;
}

public String getCommitHash() {
Expand All @@ -28,8 +28,8 @@ public String getAuthorEmail() {
return authorEmail;
}

public long getTimestampMilliseconds() {
return timestampMilliseconds;
public long getTimestampInSeconds() {
return timestampInSeconds;
}

@Override
Expand All @@ -46,6 +46,6 @@ public boolean equals(Object other) {
return commitHash.equals(otherLineInfo.commitHash)
&& authorName.equals(otherLineInfo.authorName)
&& authorEmail.equals(otherLineInfo.authorEmail)
&& timestampMilliseconds == otherLineInfo.timestampMilliseconds;
&& timestampInSeconds == otherLineInfo.timestampInSeconds;
}
}
48 changes: 48 additions & 0 deletions src/test/java/reposense/git/GitBlameTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package reposense.git;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -78,4 +80,50 @@ public void blameLine_nonExistentLine_throwsRunTimeException() {
Assertions.assertThrows(RuntimeException.class, () -> GitBlame.blameLine(config.getRepoRoot(),
FAKE_AUTHOR_BLAME_TEST_FILE_COMMIT_08022018_STRING, "blameTest.java", 5));
}

@Test
public void blameFile_validFile_success() {
List<GitBlameLineInfo> expectedLineInfos = new ArrayList<>();
expectedLineInfos.add(new GitBlameLineInfo("8d0ac2ee20f04dce8df0591caed460bffacb65a4",
MAIN_AUTHOR_NAME, "[email protected]", 1517863105));
expectedLineInfos.add(new GitBlameLineInfo("8d0ac2ee20f04dce8df0591caed460bffacb65a4",
MAIN_AUTHOR_NAME, "[email protected]", 1517863105));
expectedLineInfos.add(new GitBlameLineInfo("768015345e70f06add2a8b7d1f901dc07bf70582",
FAKE_AUTHOR_NAME, "[email protected]", 1518085550));
expectedLineInfos.add(new GitBlameLineInfo("8d0ac2ee20f04dce8df0591caed460bffacb65a4",
MAIN_AUTHOR_NAME, "[email protected]", 1517863105));
List<GitBlameLineInfo> actualLineInfos = GitBlame.blameFile(config.getRepoRoot(), "blameTest.java", false);
Assertions.assertEquals(expectedLineInfos, actualLineInfos);
}

@Test
public void blameFile_validFileWithPreviousAuthors_success() {
config.setBranch(TEST_REPO_BLAME_WITH_PREVIOUS_AUTHORS_BRANCH);
GitCheckout.checkoutBranch(config.getRepoRoot(), TEST_REPO_BLAME_WITH_PREVIOUS_AUTHORS_BRANCH);
createTestIgnoreRevsFile(AUTHOR_TO_IGNORE_BLAME_COMMIT_LIST_07082021);

List<GitBlameLineInfo> expectedLineInfos = new ArrayList<>();
expectedLineInfos.add(new GitBlameLineInfo("8d0ac2ee20f04dce8df0591caed460bffacb65a4",
MAIN_AUTHOR_NAME, "[email protected]", 1517863105));
expectedLineInfos.add(new GitBlameLineInfo("8d0ac2ee20f04dce8df0591caed460bffacb65a4",
MAIN_AUTHOR_NAME, "[email protected]", 1517863105));
expectedLineInfos.add(new GitBlameLineInfo("768015345e70f06add2a8b7d1f901dc07bf70582",
FAKE_AUTHOR_NAME, "[email protected]", 1518085550));
expectedLineInfos.add(new GitBlameLineInfo("8d0ac2ee20f04dce8df0591caed460bffacb65a4",
MAIN_AUTHOR_NAME, "[email protected]", 1517863105));
List<GitBlameLineInfo> actualLineInfos = GitBlame.blameFile(config.getRepoRoot(), "blameTest.java", true);
Assertions.assertEquals(expectedLineInfos, actualLineInfos);
}

@Test
public void blameFile_nonExistentFile_throwsRunTimeException() {
Assertions.assertThrows(RuntimeException.class, () -> GitBlame.blameFile(config.getRepoRoot(),
"nonExistentFile", false));
}

@Test
public void blameFile_nonExistentFileWithPreviousAuthors_throwsRunTimeException() {
Assertions.assertThrows(RuntimeException.class, () -> GitBlame.blameFile(config.getRepoRoot(),
"nonExistentFile", true));
}
}

0 comments on commit e53c52c

Please sign in to comment.