diff --git a/src/helpers/pull-request-parsing.ts b/src/helpers/pull-request-parsing.ts index efa307b..87a86e6 100644 --- a/src/helpers/pull-request-parsing.ts +++ b/src/helpers/pull-request-parsing.ts @@ -9,13 +9,8 @@ export async function processPullRequestDiff(diff: string, tokenLimits: TokenLim // parse the diff into per-file diffs for quicker processing const perFileDiffs = parsePerFileDiffs(diff); - // filter out obviously non-essential files; .png, .jpg, .pdf, etc. - const essentialFileDiffs = perFileDiffs.filter(({ filename }) => { - return isEssentialFile(filename); - }); - // quick estimate using a simple heuristic; 3.5 characters per token - const estimatedFileDiffStats = essentialFileDiffs.map(({ filename, diffContent }) => { + const estimatedFileDiffStats = perFileDiffs.map(({ filename, diffContent }) => { const estimatedTokenCount = Math.ceil(diffContent.length / 3.5); return { filename, estimatedTokenCount, diffContent }; }); @@ -107,155 +102,3 @@ export function parsePerFileDiffs(diff: string): { filename: string; diffContent return perFileDiffs; } - -// This speeds things up considerably by skipping non-readable/non-relevant files -function isEssentialFile(filename: string): boolean { - const nonEssentialExtensions = [ - // Image files - ".png", - ".jpg", - ".jpeg", - ".gif", - ".bmp", - ".tiff", - ".svg", - ".ico", - ".psd", - ".ai", - ".eps", - - // Video files - ".mp4", - ".avi", - ".mov", - ".wmv", - ".flv", - ".mkv", - ".webm", - ".mpeg", - ".mpg", - ".m4v", - - // Audio files - ".mp3", - ".wav", - ".flac", - ".aac", - ".ogg", - ".wma", - ".m4a", - ".aiff", - ".ape", - - // Document files - ".pdf", - ".doc", - ".docx", - ".xls", - ".xlsx", - ".ppt", - ".pptx", - ".odt", - ".ods", - ".odp", - - // Archive files - ".zip", - ".rar", - ".7z", - ".tar", - ".gz", - ".bz2", - ".xz", - ".lz", - ".z", - - // Executable and binary files - ".exe", - ".dll", - ".so", - ".dylib", - ".bin", - ".class", - ".jar", - ".war", - ".ear", - ".msi", - ".apk", - ".ipa", - - // Compiled object files - ".o", - ".obj", - ".pyc", - ".pyo", - ".pyd", - ".lib", - ".a", - ".dSYM", - - // System and temporary files - ".sys", - ".tmp", - ".bak", - ".old", - ".swp", - ".swo", - ".lock", - ".cfg", - ".ini", - - // Database files - ".db", - ".sqlite", - ".sqlite3", - ".mdb", - ".accdb", - ".dbf", - ".frm", - ".myd", - ".myi", - - // Font files - ".ttf", - ".otf", - ".woff", - ".woff2", - ".eot", - - // Backup and miscellaneous files - ".log", - ".bak", - ".orig", - ".sav", - ".save", - ".dump", - - // Other non-essential files - ".crt", - ".pem", - ".key", - ".csr", - ".der", // Certificate files - ".plist", - ".mobileprovision", // iOS specific files - ".icns", // macOS icon files - ".ds_store", - "thumbs.db", - "desktop.ini", // System files - - // Generated files - ".map", - ".min.js", - ".d.ts", - ".map.js", - ".map.css", - ".bundle.js", - ".bundle.css", - ".bundle.js.map", - ".bundle.css.map", - ".bundle.min.js", - ]; - - return !nonEssentialExtensions.some((ext) => filename.toLowerCase().endsWith(ext)); -}