Skip to content

Commit

Permalink
chore: remove exclusion by file ext
Browse files Browse the repository at this point in the history
  • Loading branch information
Keyrxng committed Oct 31, 2024
1 parent f4332f9 commit a6ffb03
Showing 1 changed file with 1 addition and 158 deletions.
159 changes: 1 addition & 158 deletions src/helpers/pull-request-parsing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,8 @@ export async function processPullRequestDiff(diff: string, tokenLimits: TokenLim
// parse the diff into per-file diffs for quicker processing
const perFileDiffs = parsePerFileDiffs(diff);

// filter out obviously non-essential files; .png, .jpg, .pdf, etc.
const essentialFileDiffs = perFileDiffs.filter(({ filename }) => {
return isEssentialFile(filename);
});

// quick estimate using a simple heuristic; 3.5 characters per token
const estimatedFileDiffStats = essentialFileDiffs.map(({ filename, diffContent }) => {
const estimatedFileDiffStats = perFileDiffs.map(({ filename, diffContent }) => {
const estimatedTokenCount = Math.ceil(diffContent.length / 3.5);
return { filename, estimatedTokenCount, diffContent };
});
Expand Down Expand Up @@ -107,155 +102,3 @@ export function parsePerFileDiffs(diff: string): { filename: string; diffContent

return perFileDiffs;
}

// This speeds things up considerably by skipping non-readable/non-relevant files
function isEssentialFile(filename: string): boolean {
const nonEssentialExtensions = [
// Image files
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".tiff",
".svg",
".ico",
".psd",
".ai",
".eps",

// Video files
".mp4",
".avi",
".mov",
".wmv",
".flv",
".mkv",
".webm",
".mpeg",
".mpg",
".m4v",

// Audio files
".mp3",
".wav",
".flac",
".aac",
".ogg",
".wma",
".m4a",
".aiff",
".ape",

// Document files
".pdf",
".doc",
".docx",
".xls",
".xlsx",
".ppt",
".pptx",
".odt",
".ods",
".odp",

// Archive files
".zip",
".rar",
".7z",
".tar",
".gz",
".bz2",
".xz",
".lz",
".z",

// Executable and binary files
".exe",
".dll",
".so",
".dylib",
".bin",
".class",
".jar",
".war",
".ear",
".msi",
".apk",
".ipa",

// Compiled object files
".o",
".obj",
".pyc",
".pyo",
".pyd",
".lib",
".a",
".dSYM",

// System and temporary files
".sys",
".tmp",
".bak",
".old",
".swp",
".swo",
".lock",
".cfg",
".ini",

// Database files
".db",
".sqlite",
".sqlite3",
".mdb",
".accdb",
".dbf",
".frm",
".myd",
".myi",

// Font files
".ttf",
".otf",
".woff",
".woff2",
".eot",

// Backup and miscellaneous files
".log",
".bak",
".orig",
".sav",
".save",
".dump",

// Other non-essential files
".crt",
".pem",
".key",
".csr",
".der", // Certificate files
".plist",
".mobileprovision", // iOS specific files
".icns", // macOS icon files
".ds_store",
"thumbs.db",
"desktop.ini", // System files

// Generated files
".map",
".min.js",
".d.ts",
".map.js",
".map.css",
".bundle.js",
".bundle.css",
".bundle.js.map",
".bundle.css.map",
".bundle.min.js",
];

return !nonEssentialExtensions.some((ext) => filename.toLowerCase().endsWith(ext));
}

0 comments on commit a6ffb03

Please sign in to comment.