Skip to content

Commit

Permalink
Merge pull request #9 from capnajax/v2.1
Browse files Browse the repository at this point in the history
V2.1
  • Loading branch information
capnajax authored Oct 26, 2020
2 parents 21a7d51 + 2645767 commit 84d87b8
Show file tree
Hide file tree
Showing 7 changed files with 462 additions and 80 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,13 @@ Password is not acceptable.

## Change history

### v2.0 (current release)
### v2.1 (current release)

The algorithm for detecting character ranges changed to allow for detecting
even more character sets and finer-grained classification with little
performance loss.

### v2.0

Version 2.0 is a breaking change in that some vocabulary will change for
semantic accuracy and to more closely match Unicode Consortiums's use of
Expand Down
117 changes: 40 additions & 77 deletions lib/entropy.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
const emojiRegex = require('emoji-regex');
const fs = require('fs');
const path = require('path');
const ranges = require('./ranges');

const TOKEN_CLASS_EMOJI = 'emoji';
const TOKEN_CLASS_COMMON = 'common-password';
Expand All @@ -13,17 +14,6 @@ const DEFAULT_CONFIGURATION = {
sets: 'all'
}

// Common Chinese words in both Traditional and Simplified Chinese
const COMMON_HANZI =
// 100 most common Chinese words, Simplified and Traditional
"的一是不了人我在有他这为之大来以个中上们到说国和地也子时道出而要于就下得可你年生自会那后能对着事其里" +
"的一是不了人我在有他這為之大來以個中上們到說國和地也子時道出而要於就下得可你年生自會那後能對著事其里" +
"所去行过家十用发天如然作方成者多日都三小军二无同么经法当起与好看学进种将还分此心前面又定见只主没公从" +
"所去行過家十用發天如然作方成者多日都三小軍二無同麼經法當起與好看學進種將還分此心前面又定見只主沒公從" +
// added passwordy words
"爱你四死秘" +
"愛妳四死秘";

/**
* Do not processes characters in these ranges. Ranges are inclusive.
*/
Expand All @@ -41,59 +31,28 @@ const SKIP_RANGES = [
];

/**
* @constant ENTROPY_CLASSES
* @constant SPECIAL_ENTROPY_CLASSES
* Entropy classes that are difficult to express as a character range. Each
* class has a test to determine if the token is part of that class. These
* are tested before the ranges are tested.
* Each character can only be a member of one class, the first matching class takes
* precedent. Each category has an entropy score (usually how may possible characters with
* some consideration for the randomness of their usage), a description, and either a
* range or a test to indicate if the character is in the class. Total entropy is ln(scores
* of all the unique entropy classes multiplied*length);
*/
const ENTROPY_CLASSES = [
const SPECIAL_ENTROPY_CLASSES = [
{ score: Math.log(100), name: "emoji", test: (c) => {
return codeToChar[c] && codeToChar[c].tokenClass === TOKEN_CLASS_EMOJI;
}},
{ score: Math.log(20), name: "common-passwords", test: (c) => {
return codeToChar[c] && codeToChar[c].tokenClass === TOKEN_CLASS_COMMON;
}},
{ score: Math.log(10), name: "number", range: [0x30, 0x39]},
{ score: Math.log(26), name: "latin-small", range: [0x61,0x7a]},
{ score: Math.log(26), name: "latin-capital", range: [0x41,0x5a]},
// rated "10" because most languages only a small number of accented characters.
// includes letters from Latin1 Supplement, Latins Extended A to E, IPA extensions,
// and Latin Extended Additional
{ score: Math.log(10), name: "latin-extended",
range: [0xc0, 0xd6, 0xd8, 0xf6, 0xf8, 0x02af, 0x1e00, 0x1eff,
0x2c60, 0x2c7f, 0xa720, 0xa7ff, 0xab30, 0xab6f]},
{ score: Math.log(12), name: "special", range: [0x21, 0x2f, 0x3a, 0x3f, 0x5b, 0x60]},
{ score: Math.log(33), name: "cyrillic-capital", range: [0x410,0x42f]},
{ score: Math.log(33), name: "cyrillic-small", range: [0x430,0x44f]},
// includes letters from Cyrillic Supplements, Cyrillic Extended A, B, and C,
// and Cyrillic letters not included in upper-cyrillic or lower-cyrillic
{ score: Math.log(10), name: "cyrillic-extended",
range: [0x400, 0x40f, 0x450, 0x52f, 0x2de0, 0x2dff, 0xa640, 0xa69f, 0x1c80, 0x1c8f]},
{ score: Math.log(24), name: "greek-capital", range: [0x391,0x3a9]},
{ score: Math.log(24), name: "greek-small", range: [0x3b1,0x3c9]},
// includes a few unassigned code points so we only have to test one range
// instead of 16
{ score: Math.log(10), name: "greek-extended", range: [0x1f00, 0x1ff]},
{ score: Math.log(40), name: "hiragana", range: [0x3041,0x3096]},
{ score: Math.log(40), name: "katakana", range: [0x30A0,0x30fa]},
{ score: Math.log(40), name: "bopomofo", range: [0x3105,0x312c,0x31a0,0x31b7]},
{ score: Math.log(500), name: "hangul",
range: [0xac00,0xd7af,0x1100,0x11ff,0x3130,0x318f,0xa960,0xa97F,0xd7B0,0xd7FF]},
{ score: Math.log(100), name: "common-hanzi", test: (c) => {
if (null === commonHanziSet) {
commonHanziSet=letterSet(COMMON_HANZI);
}
return commonHanziSet.has(c);
}},
{ score: Math.log(1000), name: "hanzi", range: [0x4e00, 0x9fbf]},
// unknown can cover "burred" latin, cyrillic, etc, so the
// entropy is limited to 100.
{ score: Math.log(100), name: "unknown", test: (c) => {return true;}},
// reachable only if 'unknown' is not permitted
{ score: Math.log(100), name: "illegal", test: (c) => {return true;}}
}}
];
const EXCEPTION_ENTROPY_CLASSES = {
unknown: { score: Math.log(100), name: "unknown"},
illegal: { score: Math.log(20), name: "illegal"}
};

const ENTROPY_CLASS_ALIASES = {
western: ['common-passwords', 'latin-capital','latin-small', 'number', 'special']
Expand All @@ -118,9 +77,9 @@ defaults(_configs, DEFAULT_CONFIGURATION);
// converted to numbers. Read the file, then split into words and sanitize it.
let commonPasswords =
fs.readFileSync(path.join(__dirname, 'common-passwords.txt'))
.toString().split('\n')
.map(pw => { return pw.replace(/#.*/, '').replace(/\s/g,''); })
.filter(pw => pw.length > 4);
.toString().split('\n')
.map(pw => { return pw.replace(/#.*/, '').replace(/\s/g,''); })
.filter(pw => pw.length > 4);
commonPasswords.push('@#$%'); // because this would have messed with the filters

/**
Expand Down Expand Up @@ -348,37 +307,42 @@ function passwordEntropy(_password) {
if (classTally[ecj.name]) {
classTally[ecj.name]++;
} else {
entropyClassScores[ecj.name] = ecj.score;
classTally[ecj.name] = 1;
}
};
let entropyClassScores = {};

for(let pci of uniquePasswordLetters) {
// first find the right class
let found = false;
for(let j = 0; j < ENTROPY_CLASSES.length; j++) {
let ecj = ENTROPY_CLASSES[j];
for(let j = 0; j < SPECIAL_ENTROPY_CLASSES.length; j++) {
let ecj = SPECIAL_ENTROPY_CLASSES[j];
// only search permitted entropy classes
if (entropyClassAcceptable(ecj.name) || ecj.name === 'illegal') {
if (ecj.hasOwnProperty("range")) {
for (let k = 0; k < ecj.range.length; k += 2) {
if (inRange(pci, ecj.range[k], ecj.range[k+1])) {
found = true;
break;
}
}
if (found) {
foundClass(ecj);
break;
}
} else if(ecj.hasOwnProperty("test")) {
if (ecj.test(pci)) {
foundClass(ecj);
found = true;
break;
}
if (entropyClassAcceptable(ecj.name)) {
if (ecj.test(pci)) {
foundClass(ecj);
found = true;
break;
}
}
}
if (!found) {
let rs = ranges.search(pci);
if (rs.known) {
if (entropyClassAcceptable(rs.range.name)) {
foundClass(rs.range);
found = true;
}
}
}
if (!found) {
if (entropyClassAcceptable('unknown')) {
foundClass(EXCEPTION_ENTROPY_CLASSES.unknown);
} else {
foundClass(EXCEPTION_ENTROPY_CLASSES.illegal);
}
}
}

result.legal = !classTally['illegal'];
Expand All @@ -400,8 +364,7 @@ function passwordEntropy(_password) {
// now calculate the total entropy of the classes represented
result.sets = Object.keys(classTally);
for (i = 0; i < result.sets.length; i++) {
let tokenSet = ENTROPY_CLASSES.find(t => t.name === result.sets[i]);
result.entropy += tokenSet.score;
result.entropy += entropyClassScores[result.sets[i]];
}
// total class entropy * number of tokens gives us the score
result.entropy *= result.length;
Expand Down
Loading

0 comments on commit 84d87b8

Please sign in to comment.