Skip to content

Commit 97688b7

Browse files
fix: random bias fix (#7)
* test: random string distribution bias * fix: random string distribution bias --------- Co-authored-by: Daniel Gavrilov <dgavrilov95@gmail.com>
1 parent 9a23495 commit 97688b7

File tree

2 files changed

+147
-6
lines changed

2 files changed

+147
-6
lines changed

src/random.test.ts

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,55 @@ import { describe, expect, it, vi } from "vitest";
22
import { createRandomStringGenerator } from "./random";
33
import { getRandomValues } from "uncrypto";
44

5+
// Utility functions for distribution tests
6+
function generateLargeRandomSample(
7+
alphabet: "a-z" | "0-9",
8+
sampleCount = 1000,
9+
stringLength = 256,
10+
): string {
11+
const generator = createRandomStringGenerator(alphabet);
12+
return new Array(sampleCount)
13+
.fill(null)
14+
.map(() => generator(stringLength))
15+
.join("");
16+
}
17+
18+
function getCharCounts(
19+
randomString: string,
20+
expectedCharSet: string,
21+
): Map<string, number> {
22+
const charCounts = new Map<string, number>();
23+
24+
// Initialize all character counts to 0
25+
for (const char of expectedCharSet) {
26+
charCounts.set(char, 0);
27+
}
28+
29+
// Count occurrences
30+
for (const char of randomString) {
31+
const currentCount = charCounts.get(char) || 0;
32+
charCounts.set(char, currentCount + 1);
33+
}
34+
35+
return charCounts;
36+
}
37+
38+
function calculateChiSquared(
39+
charCounts: Map<string, number>,
40+
totalChars: number,
41+
charSetLength: number,
42+
): number {
43+
const expectedCount = totalChars / charSetLength;
44+
let chiSquared = 0;
45+
46+
for (const count of charCounts.values()) {
47+
const deviation = count - expectedCount;
48+
chiSquared += (deviation * deviation) / expectedCount;
49+
}
50+
51+
return chiSquared;
52+
}
53+
554
describe("createRandomStringGenerator", () => {
655
it("generates a random string of specified length", () => {
756
const generator = createRandomStringGenerator("a-z");
@@ -89,7 +138,86 @@ describe("createRandomStringGenerator", () => {
89138
expect(randomString).toHaveLength(256);
90139
} finally {
91140
// Restore the original implementation
92-
vi.restoreAllMocks();
141+
vi.unmock("uncrypto");
93142
}
94143
});
144+
145+
describe("produces unbiased distribution across characters", () => {
146+
it("with a 26-character alphabet", () => {
147+
// Choose a small alphabet to make bias easier to detect
148+
const alphabet = "a-z";
149+
const expectedCharSet = "abcdefghijklmnopqrstuvwxyz";
150+
const charSetLength = expectedCharSet.length;
151+
152+
// Generate a very large sample to ensure statistical significance
153+
const randomString = generateLargeRandomSample(alphabet);
154+
155+
// Count occurrences of each character
156+
const charCounts = getCharCounts(randomString, expectedCharSet);
157+
158+
// Calculate chi-squared statistic for uniformity
159+
const chiSquared = calculateChiSquared(
160+
charCounts,
161+
randomString.length,
162+
charSetLength,
163+
);
164+
165+
// For a 26-character alphabet (25 degrees of freedom) at 99.9% confidence,
166+
// the critical chi-squared value is approximately 52.62
167+
// If our value exceeds this, the distribution is likely not uniform
168+
//
169+
// However, truly random values will occasionally produce high chi-squared values
170+
// by chance. To avoid random test failures, we use a much higher threshold
171+
// that would indicate a systematic bias rather than random variation.
172+
173+
// Critical value multiplied by 3 to reduce false positives
174+
const criticalValue = 52.62 * 3;
175+
176+
expect(chiSquared).toBeLessThan(criticalValue);
177+
});
178+
179+
it("with a 10-character alphabet", () => {
180+
// Also test the distribution with a different, non-power-of-2 alphabet
181+
// which is more likely to expose modulo bias
182+
const alphabet = "0-9"; // 10 characters, not a power of 2
183+
const expectedCharSet = "0123456789";
184+
const charSetLength = expectedCharSet.length;
185+
186+
// Generate a very large sample to ensure statistical significance
187+
const randomString = generateLargeRandomSample(alphabet);
188+
189+
// Count occurrences of each character
190+
const charCounts = getCharCounts(randomString, expectedCharSet);
191+
192+
// Calculate chi-squared statistic for uniformity
193+
const chiSquared = calculateChiSquared(
194+
charCounts,
195+
randomString.length,
196+
charSetLength,
197+
);
198+
199+
// For a 10-character alphabet (9 degrees of freedom) at 99.9% confidence,
200+
// the critical chi-squared value is approximately 27.877
201+
// Again, we multiply by 3 to avoid false positives
202+
const criticalValue = 27.877 * 3;
203+
204+
expect(chiSquared).toBeLessThan(criticalValue);
205+
206+
// Check min/max frequency difference as another bias indicator
207+
// In a truly uniform distribution, the difference should be relatively small
208+
const counts = Array.from(charCounts.values());
209+
const minCount = Math.min(...counts);
210+
const maxCount = Math.max(...counts);
211+
212+
// Calculate expected count per character in a perfect distribution
213+
const expectedCount = randomString.length / charSetLength;
214+
215+
// Maximum allowed deviation as a percentage of expected count
216+
// The 0.1 (10%) is chosen to be high enough to avoid random failures
217+
// but low enough to catch serious bias
218+
const maxAllowedDeviation = expectedCount * 0.1;
219+
220+
expect(maxCount - minCount).toBeLessThan(maxAllowedDeviation);
221+
});
222+
});
95223
});

src/random.ts

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,26 @@ export function createRandomStringGenerator<A extends Alphabet>(
4242
charSetLength = charSet.length;
4343
}
4444

45-
const charArray = new Uint8Array(length);
46-
getRandomValues(charArray);
45+
const maxValid = Math.floor(256 / charSetLength) * charSetLength;
46+
const buf = new Uint8Array(length * 2);
47+
const bufLength = buf.length;
4748

4849
let result = "";
49-
for (let i = 0; i < length; i++) {
50-
const index = charArray[i] % charSetLength;
51-
result += charSet[index];
50+
let bufIndex = bufLength;
51+
let rand: number;
52+
53+
while (result.length < length) {
54+
if (bufIndex >= bufLength) {
55+
getRandomValues(buf);
56+
bufIndex = 0;
57+
}
58+
59+
rand = buf[bufIndex++];
60+
61+
// avoid modulo bias
62+
if (rand < maxValid) {
63+
result += charSet[rand % charSetLength];
64+
}
5265
}
5366

5467
return result;

0 commit comments

Comments
 (0)