Skip to content

Commit

Permalink
Add LOC checking + option calculateLines: #34
Browse files Browse the repository at this point in the history
  • Loading branch information
Nixinova committed Sep 14, 2024
1 parent 2403222 commit 49e5047
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 28 deletions.
2 changes: 2 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

## Next
- Added application of GitHub-Linguist override rule `linguist-detectable`.
- Added line of code calculation to the output.
- Added option `checkDetected` to control the application of `linguist-detectable` overrides.
- Added option `calculateLines` (defaults to true) to control whether LOC calculations are performed.

## 2.7.1
*2024-05-08*
Expand Down
27 changes: 23 additions & 4 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ Running LinguistJS on this folder will return the following JSON:
"files": {
"count": 5,
"bytes": 6020,
"lines": {
"total": 100,
"content": 90,
"code": 80,
},
"results": {
"/src/index.ts": "TypeScript",
"/src/cli.js": "JavaScript",
Expand All @@ -64,16 +69,26 @@ Running LinguistJS on this folder will return the following JSON:
"languages": {
"count": 3,
"bytes": 6010,
"lines": {
"total": 90,
"content": 80,
"code": 70,
},
"results": {
"JavaScript": { "type": "programming", "bytes": 1000, "color": "#f1e05a" },
"Markdown": { "type": "prose", "bytes": 3000, "color": "#083fa1" },
"Ruby": { "type": "programming", "bytes": 10, "color": "#701516" },
"TypeScript": { "type": "programming", "bytes": 2000, "color": "#2b7489" },
"JavaScript": { "type": "programming", "bytes": 1000, "lines": { "total": 49, "content": 49, "code": 44 }, "color": "#f1e05a" },
"Markdown": { "type": "prose", "bytes": 3000, "lines": { "total": 10, "content": 5, "code": 5 }, "color": "#083fa1" },
"Ruby": { "type": "programming", "bytes": 10, "lines": { "total": 1, "content": 1, "code": 1 }, "color": "#701516" },
"TypeScript": { "type": "programming", "bytes": 2000, "lines": { "total": 30, "content": 25, "code": 20 }, "color": "#2b7489" },
},
},
"unknown": {
"count": 1,
"bytes": 10,
"lines": {
"total": 10,
"content": 10,
"code": 10,
},
"filenames": {
"no-lang": 10,
},
Expand Down Expand Up @@ -130,6 +145,8 @@ const { files, languages, unknown } = await linguist(fileNames, { fileContent, .
Alias for `checkAttributes:false, checkIgnored:false, checkDetected:false, checkHeuristics:false, checkShebang:false, checkModeline:false`.
- `offline` (boolean):
Whether to use pre-packaged metadata files instead of fetching them from GitHub at runtime (defaults to `false`).
- `calculateLines` (boolean):
Whether to calculate line of code totals (defaults to `true`).
- `keepVendored` (boolean):
Whether to keep vendored files (dependencies, etc) (defaults to `false`).
Does nothing when `fileContent` is set.
Expand Down Expand Up @@ -189,6 +206,8 @@ linguist --version
Alias for `--checkAttributes=false --checkIgnored=false --checkHeuristics=false --checkShebang=false --checkModeline=false`.
- `--offline`:
Use pre-packaged metadata files instead of fetching them from GitHub at runtime.
- `--calculateLines`:
Calculate line of code totals from files.
- `--keepVendored`:
Include vendored files (auto-generated files, dependencies folder, etc) in the output.
- `--keepBinary`:
Expand Down
6 changes: 4 additions & 2 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ program
.option('-F|--listFiles [bool]', 'Whether to list every matching file under the language results', false)
.option('-q|--quick [bool]', 'Skip complex language analysis (alias for -{A|I|H|S}=false)', false)
.option('-o|--offline [bool]', 'Use packaged data files instead of fetching latest from GitHub', false)
.option('-L|--calculateLines [bool]', 'Calculate lines of code totals', true)
.option('-V|--keepVendored [bool]', 'Prevent skipping over vendored/generated files', false)
.option('-B|--keepBinary [bool]', 'Prevent skipping over binary files', false)
.option('-r|--relativePaths [bool]', 'Convert absolute file paths to relative', false)
Expand Down Expand Up @@ -78,16 +79,17 @@ if (args.analyze) (async () => {
}
}
// List parsed results
for (const [lang, { bytes, color }] of sortedEntries) {
for (const [lang, { bytes, lines, color }] of sortedEntries) {
const percent = (bytes: number) => bytes / (totalBytes || 1) * 100;
const fmtd = {
index: (++count).toString().padStart(2, ' '),
lang: lang.padEnd(24, ' '),
percent: percent(bytes).toFixed(2).padStart(5, ' '),
bytes: bytes.toLocaleString().padStart(10, ' '),
loc: lines.code.toLocaleString().padStart(10, ' '),
icon: colouredMsg(hexToRgb(color ?? '#ededed'), '\u2588'),
};
console.log(` ${fmtd.index}. ${fmtd.icon} ${fmtd.lang} ${fmtd.percent}% ${fmtd.bytes} B`);
console.log(` ${fmtd.index}. ${fmtd.icon} ${fmtd.lang} ${fmtd.percent}% ${fmtd.bytes} B ${fmtd.loc} LOC`);

// If using `listFiles` option, list all files tagged as this language
if (args.listFiles) {
Expand Down
2 changes: 1 addition & 1 deletion src/helpers/read-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import fs from 'fs';
* Read part of a file on disc.
* @throws 'EPERM' if the file is not readable.
*/
export default async function readFile(filename: string, onlyFirstLine: boolean = false): Promise<string> {
export default async function readFileChunk(filename: string, onlyFirstLine: boolean = false): Promise<string> {
const chunkSize = 100;
const stream = fs.createReadStream(filename, { highWaterMark: chunkSize });
let content = '';
Expand Down
53 changes: 43 additions & 10 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { isBinaryFile } from 'isbinaryfile';

import walk from './helpers/walk-tree';
import loadFile, { parseGeneratedDataFile } from './helpers/load-data';
import readFile from './helpers/read-file';
import readFileChunk from './helpers/read-file';
import parseAttributes, { FlagAttributes } from './helpers/parse-gitattributes';
import pcre from './helpers/convert-pcre';
import { normPath } from './helpers/norm-path';
Expand All @@ -24,6 +24,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom

// Normalise input option arguments
opts = {
calculateLines: opts.calculateLines ?? true, // default to true if unset
checkIgnored: !opts.quick,
checkDetected: !opts.quick,
checkAttributes: !opts.quick,
Expand All @@ -46,9 +47,9 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
const extensions: Record<T.AbsFile, string> = {};
const globOverrides: Record<T.AbsFile, T.LanguageResult> = {};
const results: T.Results = {
files: { count: 0, bytes: 0, results: {}, alternatives: {} },
languages: { count: 0, bytes: 0, results: {} },
unknown: { count: 0, bytes: 0, extensions: {}, filenames: {} },
files: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, results: {}, alternatives: {} },
languages: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, results: {} },
unknown: { count: 0, bytes: 0, lines: { total: 0, content: 0, code: 0 }, extensions: {}, filenames: {} },
};

// Set a common root path so that vendor paths do not incorrectly match parent folders
Expand Down Expand Up @@ -107,7 +108,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
for (const attrFile of nestedAttrFiles) {
const relAttrFile = relPath(attrFile);
const relAttrFolder = paths.dirname(relAttrFile);
const contents = await readFile(attrFile);
const contents = await readFileChunk(attrFile);
const parsed = parseAttributes(contents, relAttrFolder);
for (const { glob, attrs } of parsed) {
manualAttributes[glob] = attrs;
Expand Down Expand Up @@ -231,7 +232,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
firstLine = manualFileContent[files.indexOf(file)]?.split('\n')[0] ?? null;
}
else if (fs.existsSync(file) && !fs.lstatSync(file).isDirectory()) {
firstLine = await readFile(file, true).catch(() => null);
firstLine = await readFileChunk(file, true).catch(() => null);
}
else continue;

Expand Down Expand Up @@ -347,7 +348,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
}

// Check file contents and apply heuristic patterns
const fileContent = opts.fileContent ? manualFileContent[files.indexOf(file)] : await readFile(file).catch(() => null);
const fileContent = opts.fileContent ? manualFileContent[files.indexOf(file)] : await readFileChunk(file).catch(() => null);

// Skip if file read errors
if (fileContent === null) continue;
Expand All @@ -373,7 +374,6 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
}

// Skip specified categories
// todo linguist-detectable
if (opts.categories?.length) {
const categories: T.Category[] = ['data', 'markup', 'programming', 'prose'];
const hiddenCategories = categories.filter(cat => !opts.categories!.includes(cat));
Expand Down Expand Up @@ -417,8 +417,21 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
// Load language bytes size
for (const [file, lang] of Object.entries(results.files.results)) {
if (lang && !langData[lang]) continue;
// Calculate file size
const fileSize = manualFileContent[files.indexOf(file)]?.length ?? fs.statSync(file).size;
results.files.bytes += fileSize;
// Calculate lines of code
const loc = { total: 0, content: 0, code: 0 };
if (opts.calculateLines) {
// TODO: catch error?
const fileContent = (manualFileContent[files.indexOf(file)] ?? fs.readFileSync(file).toString()) ?? '';
const allLines = fileContent.split(/\r?\n/gm);
loc.total = allLines.length;
loc.content = allLines.filter(line => line.trim().length > 0).length;
const codeLines = fileContent
.replace(/^\s*(\/\/|# |;|--).+/gm, '')
.replace(/\/\*.+\*\/|<!--.+-->/sg, '')
loc.code = codeLines.split(/\r?\n/gm).filter(line => line.trim().length > 0).length;
}
// If no language found, add extension in other section
if (!lang) {
const ext = paths.extname(file);
Expand All @@ -427,16 +440,36 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
results.unknown[unknownType][name] ??= 0;
results.unknown[unknownType][name] += fileSize;
results.unknown.bytes += fileSize;
results.unknown.lines.total += loc.total;
results.unknown.lines.content += loc.content;
results.unknown.lines.code += loc.code;
continue;
}
// Add language and bytes data to corresponding section
const { type } = langData[lang];
results.languages.results[lang] ??= { type, bytes: 0, color: langData[lang].color };
results.languages.results[lang] ??= { type, bytes: 0, lines: { total: 0, content: 0, code: 0 }, color: langData[lang].color };
if (opts.childLanguages) {
results.languages.results[lang].parent = langData[lang].group;
}
// apply file sizes
results.files.bytes += fileSize;
results.languages.results[lang].bytes += fileSize;
results.languages.bytes += fileSize;
// apply LOC calculations
results.files.lines.total += loc.total;
results.files.lines.content += loc.content;
results.files.lines.code += loc.code;
results.languages.results[lang].lines.total += loc.total;
results.languages.results[lang].lines.content += loc.content;
results.languages.results[lang].lines.code += loc.code;
results.languages.lines.total += loc.total;
results.languages.lines.content += loc.content;
results.languages.lines.code += loc.code;
}

// Set lines output to NaN when line calculation is disabled
if (opts.calculateLines === false) {
results.files.lines = { total: NaN, content: NaN, code: NaN }
}

// Set counts
Expand Down
21 changes: 21 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export interface Options {
childLanguages?: boolean
quick?: boolean
offline?: boolean
calculateLines?: boolean
checkIgnored?: boolean
checkDetected?: boolean
checkAttributes?: boolean
Expand All @@ -33,15 +34,30 @@ export interface Results {
files: {
count: Integer
bytes: Bytes
lines: {
total: Integer
content: Integer
code: Integer
}
/** Note: Results use slashes as delimiters even on Windows. */
results: Record<FilePath, LanguageResult>
alternatives: Record<FilePath, LanguageResult[]>
}
languages: {
count: Integer
bytes: Bytes
lines: {
total: Integer
content: Integer
code: Integer
}
results: Record<Language, {
bytes: Bytes
lines: {
total: Integer
content: Integer
code: Integer
}
type: Category
parent?: Language
color?: `#${string}`
Expand All @@ -50,6 +66,11 @@ export interface Results {
unknown: {
count: Integer
bytes: Bytes
lines: {
total: Integer
content: Integer
code: Integer
}
extensions: Record<string, Bytes>
filenames: Record<string, Bytes>
}
Expand Down
26 changes: 15 additions & 11 deletions test/expected.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
{
"files": {
"count": 11,
"bytes": 69,
"count": 12,
"bytes": 190,
"lines": { "total": 25, "content": 15, "code": 10 },
"results": {
"~/al.al": "Perl",
"~/alternatives.asc": "AGS Script",
"~/file.txt": "JavaScript",
"~/folder/file.txt": "JavaScript",
"~/folder/sub.txt": "Text",
"~/hashbang": "JavaScript",
"~/loc.c": "C",
"~/modeline.txt": "C++",
"~/package-lock.json": "JSON",
"~/detected.json": "JSON",
Expand All @@ -20,21 +22,23 @@
}
},
"languages": {
"count": 7,
"bytes": 56,
"count": 8,
"bytes": 190,
"results": {
"Perl": { "type": "programming", "bytes": 0, "color": "#0298c3" },
"AGS Script": { "type": "programming", "bytes": 14, "color": "#B9D9FF" },
"JavaScript": { "type": "programming", "bytes": 23, "color": "#f1e05a" },
"JSON": { "type": "data", "bytes": 6, "color": "#292929"},
"Text": { "type": "prose", "bytes": 0 },
"C++": { "type": "programming", "bytes": 15, "color": "#f34b7d" },
"TOML": { "type": "data", "bytes": 0, "color": "#9c4221" }
"Perl": { "type": "programming", "bytes": 0, "lines": { "total": 1, "content": 0, "code": 0 },"color": "#0298c3" },
"AGS Script": { "type": "programming", "bytes": 14, "lines": { "total": 2, "content": 1, "code": 1 },"color": "#B9D9FF" },
"JSON": { "type": "data", "bytes": 8, "lines": { "total": 4, "content": 2, "code": 2 },"color": "#292929"},
"JavaScript": { "type": "programming", "bytes": 23, "lines": { "total": 4, "content": 3, "code": 3 },"color": "#f1e05a" },
"Text": { "type": "prose", "bytes": 0, "lines": { "total": 1, "content": 0, "code": 0 } },
"C": { "type": "programming", "bytes": 130, "lines": { "total": 10, "content": 8, "code": 4 }, "color": "#555555"},
"C++": { "type": "programming", "bytes": 15, "lines": { "total": 2, "content": 1, "code": 0 }, "color": "#f34b7d" },
"TOML": { "type": "data", "bytes": 0, "lines": { "total": 1, "content": 0, "code": 0 }, "color": "#9c4221" }
}
},
"unknown": {
"count": 1,
"bytes": 9,
"lines": { "total": 2, "content": 1, "code": 1 },
"extensions": {},
"filenames": {
"unknown": 9
Expand Down
9 changes: 9 additions & 0 deletions test/samples/loc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include <stdio.h>

// empty line above
int main() {
return 1;
}
/*
total lines: 10, content lines: 8, code lines: 4
*/

0 comments on commit 49e5047

Please sign in to comment.