Skip to content

Commit 0495ab7

Browse files
committed
Fix count module FPR adjustment
1 parent 53be315 commit 0495ab7

File tree

3 files changed

+15
-11
lines changed

3 files changed

+15
-11
lines changed

src/modules/count/args.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class ProgramArguments
1212
std::vector<uint64_t> histogram;
1313
std::string out_path;
1414
size_t out_size;
15-
float target_fpr;
15+
float target_err;
1616
unsigned kmer_length;
1717
unsigned num_threads;
1818
bool long_mode;
@@ -24,8 +24,8 @@ class ProgramArguments
2424
parser.add_argument("-s").help("path to spaced seeds file (one per line, if -k not specified)");
2525
parser.add_argument("-f").help("path to k-mer spectrum file (from ntCard)").required();
2626
parser.add_argument("-e")
27-
.help("target output false positive rate")
28-
.default_value(0.01F)
27+
.help("target output count error rate")
28+
.default_value(0.001F)
2929
.scan<'g', float>();
3030
parser.add_argument("-b").help("output CBF size (bytes)").scan<'u', size_t>();
3131
parser.add_argument("--long")
@@ -43,7 +43,7 @@ class ProgramArguments
4343
reads_paths = parser.get<std::vector<std::string>>("reads");
4444
out_path = parser.get("-o");
4545
out_size = parser.is_used("-b") ? parser.get<size_t>("-b") : 0;
46-
target_fpr = parser.get<float>("-e");
46+
target_err = parser.get<float>("-e");
4747
num_threads = parser.get<unsigned>("-t");
4848
long_mode = parser.get<bool>("--long");
4949

@@ -83,7 +83,7 @@ class ProgramArguments
8383
}
8484
}
8585
ss << "[-t] thread limit: " << num_threads << std::endl;
86-
ss << "[-e] target output false-positive rate: " << target_fpr << std::endl;
86+
ss << "[-e] target output false-positive rate: " << target_err << std::endl;
8787
return ss.str();
8888
}
8989

src/modules/count/main.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,12 @@ run(std::vector<std::string> argv)
2929
size_t out_size;
3030
unsigned num_hashes;
3131
bool out_size_known = args.get()->out_size != 0;
32-
const auto cascade_fpr = 1 - std::cbrt(1 - args.get()->target_fpr);
3332
if (out_size_known) {
3433
out_size = args.get()->out_size;
3534
num_hashes = num_elements * log(2) / out_size;
3635
} else {
3736
num_hashes = 3;
38-
out_size = get_bf_size(num_elements, cascade_fpr, num_hashes);
37+
out_size = get_bf_size(num_elements, args.get()->target_err / 0.3, num_hashes);
3938
}
4039

4140
std::cout << "number of hash functions: " << num_hashes << std::endl;

tests/test_count.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,15 @@ def test_counts(self):
5151
self.logger.info(f"cbf size (bytes) = {counts_cbf.get_bytes()}")
5252
self.logger.info(f"counts fpr = {counts_cbf.get_fpr()}")
5353
self.logger.info(f"depths fpr = {depths_cbf.get_fpr()}")
54-
self.assertLessEqual(counts_cbf.get_fpr(), 1e-3)
55-
self.assertLessEqual(depths_cbf.get_fpr(), 1e-3)
54+
self.assertLess(counts_cbf.get_fpr(), 0.04)
55+
self.assertLess(depths_cbf.get_fpr(), 0.04)
56+
num_err = 0
5657
for kmer, count, depth in zip(kmers, counts, depths):
57-
self.assertEqual(counts_cbf.contains(kmer), min(count, 255))
58-
self.assertEqual(depths_cbf.contains(kmer), min(depth, 255))
58+
count_err = counts_cbf.contains(kmer) != min(count, 255)
59+
depth_err = depths_cbf.contains(kmer) != min(depth, 255)
60+
num_err += 1 if count_err or depth_err else 0
61+
err_rate = num_err / len(kmers)
62+
self.logger.info(f"{num_err} errors ({err_rate})")
63+
self.assertLess(err_rate, 0.002)
5964
# remove temporary files
6065
out_dir.cleanup()

0 commit comments

Comments
 (0)