Skip to content

Commit 6ae84b1

Browse files
committed
feat: updated main pipeline printing/timers
1 parent c4d7139 commit 6ae84b1

File tree

6 files changed

+56
-48
lines changed

6 files changed

+56
-48
lines changed

src/cluster.cpp

-7
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,6 @@ superclusterData::superclusterData(
377377
* during wf_swg_cluster().
378378
*/
379379
void superclusterData::supercluster() {
380-
if (g.verbosity >= 1) INFO(" ");
381-
if (g.verbosity >= 1) INFO("%s[%d/%d] Superclustering TRUTH and QUERY variants%s",
382-
COLOR_PURPLE, TIME_SUPCLUST, TIME_TOTAL-1, COLOR_WHITE);
383380

384381
// iterate over each contig
385382
int total_superclusters = 0;
@@ -529,10 +526,6 @@ void superclusterData::supercluster() {
529526
* to all `VariantData`
530527
* */
531528
void simple_cluster(std::shared_ptr<variantData> vcf, int callset) {
532-
if (g.verbosity >= 1) INFO(" ");
533-
if (g.verbosity >= 1) INFO("%sClustering %s VCF%s '%s'",
534-
COLOR_PURPLE, callset_strs[callset].data(),
535-
COLOR_WHITE, vcf->filename.data());
536529
bool print = false;
537530

538531
// cluster each contig

src/defs.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,14 @@ class idx4;
8888
#define SWITCHTYPES 7
8989

9090
// runtime timers
91-
#define TIME_READ 0
92-
#define TIME_CLUST 1
93-
#define TIME_SUPCLUST 2
94-
#define TIME_PR_ALN 3
95-
#define TIME_PHASE 4
96-
#define TIME_WRITE 5
97-
#define TIME_TOTAL 6
91+
#define TIME_READ 0
92+
#define TIME_EXACT_CLUSTER 1
93+
#define TIME_ALIGN_EVAL_1 2
94+
#define TIME_SIMPLE_CLUSTER 3
95+
#define TIME_ALIGN_EVAL_2 4
96+
#define TIME_PHASE 5
97+
#define TIME_WRITE 6
98+
#define TIME_TOTAL 7
9899

99100
// alignment
100101
#define QUERY 0

src/dist.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -738,9 +738,6 @@ void wf_swg_align(
738738
void precision_recall_threads_wrapper(
739739
std::shared_ptr<superclusterData> clusterdata_ptr,
740740
std::vector< std::vector< std::vector<int> > > sc_groups) {
741-
if (g.verbosity >= 1) INFO(" ");
742-
if (g.verbosity >= 1) INFO("%s[%d/%d] Calculating precision and recall%s",
743-
COLOR_PURPLE, TIME_PR_ALN, TIME_TOTAL-1, COLOR_WHITE);
744741

745742
if (g.verbosity >= 1 )
746743
for (int i = 0; i < g.thread_nsteps; i++) {

src/globals.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ std::vector<std::string> region_strs = {"OUTSIDE", "INSIDE ", "BORDER ", "OFF CT
1818
std::vector<std::string> switch_strs =
1919
{"FLIP", "SWITCH", "SWITCH+FLIP", "SWITCH_ERR", "FLIP_BEG", "FLIP_END", "NONE"};
2020
std::vector<std::string> timer_strs =
21-
{"reading", "clustering", "superclustering", "precision/recall",
22-
"phasing", "writing", "total"};
21+
{"reading", "exact clustering (small vars)", "alignment eval (small vars)",
22+
"simple clustering (large vars)", "alignment eval (large and FP/FN vars)", "phasing",
23+
"writing", "total"};
2324
std::vector<std::string> type_strs = {"REF", "SNP", "INS", "DEL", "CPX"};
2425
std::vector<std::string> type_strs2 = {"ALL", "SNP", "INS", "DEL", "INDEL"};
2526
std::vector<std::string> vartype_strs = {"SNP", "INDEL", "SV", "ALL"};

src/main.cpp

+44-28
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ int main(int argc, char **argv) {
4141
check_contigs(query_ptr, truth_ptr, ref_ptr);
4242

4343
// cluster query VCF
44-
g.timers[TIME_CLUST].start();
44+
g.timers[TIME_EXACT_CLUSTER].start();
4545
if (g.verbosity >= 1) INFO(" ");
46-
if (g.verbosity >= 1) INFO("%s[Q %d/%d] Clustering %s VCF%s '%s'",
47-
COLOR_PURPLE, TIME_CLUST, TIME_TOTAL-1, callset_strs[QUERY].data(),
46+
if (g.verbosity >= 1) INFO("%s[Q %d/%d] Exact clustering %s VCF (small vars)%s '%s'",
47+
COLOR_PURPLE, TIME_EXACT_CLUSTER, TIME_TOTAL-1, callset_strs[QUERY].data(),
4848
COLOR_WHITE, query_ptr->filename.data());
4949
std::vector<std::thread> threads;
5050
for (int t = 0; t < HAPS*int(query_ptr->contigs.size()); t++) {
@@ -57,13 +57,11 @@ int main(int argc, char **argv) {
5757
}
5858
}
5959
for (std::thread & thread : threads) thread.join();
60-
g.timers[TIME_CLUST].stop();
6160

6261
// cluster truth VCF
63-
g.timers[TIME_CLUST].start();
6462
if (g.verbosity >= 1) INFO(" ");
65-
if (g.verbosity >= 1) INFO("%s[T %d/%d] Clustering %s VCF%s '%s'",
66-
COLOR_PURPLE, TIME_CLUST, TIME_TOTAL-1, callset_strs[TRUTH].data(),
63+
if (g.verbosity >= 1) INFO("%s[T %d/%d] Exact clustering %s VCF (small vars)%s '%s'",
64+
COLOR_PURPLE, TIME_EXACT_CLUSTER, TIME_TOTAL-1, callset_strs[TRUTH].data(),
6765
COLOR_WHITE, truth_ptr->filename.data());
6866
threads.clear();
6967
for (int t = 0; t < HAPS*int(truth_ptr->contigs.size()); t++) {
@@ -76,22 +74,24 @@ int main(int argc, char **argv) {
7674
}
7775
}
7876
for (std::thread & thread : threads) thread.join();
79-
g.timers[TIME_CLUST].stop();
8077

81-
// calculate superclusters
82-
g.timers[TIME_SUPCLUST].start();
78+
// first-round superclustering
79+
if (g.verbosity >= 1) INFO(" ");
80+
if (g.verbosity >= 1) INFO("%s[%d/%d] Superclustering TRUTH and QUERY variants (small vars)%s",
81+
COLOR_PURPLE, TIME_EXACT_CLUSTER, TIME_TOTAL-1, COLOR_WHITE);
8382
std::shared_ptr<superclusterData> sc_data_ptr(
8483
new superclusterData(query_ptr, truth_ptr, ref_ptr));
85-
86-
// calculate supercluster sizes
8784
auto sc_groups = sort_superclusters(sc_data_ptr);
88-
g.timers[TIME_SUPCLUST].stop();
85+
g.timers[TIME_EXACT_CLUSTER].stop();
8986

90-
// calculate precision/recall and genotypes
91-
g.timers[TIME_PR_ALN].start();
87+
// first-round evaluation: precision/recall and genotypes
88+
g.timers[TIME_ALIGN_EVAL_1].start();
89+
if (g.verbosity >= 1) INFO(" ");
90+
if (g.verbosity >= 1) INFO("%s[%d/%d] Evaluating variant calls (small vars)%s",
91+
COLOR_PURPLE, TIME_ALIGN_EVAL_1, TIME_TOTAL-1, COLOR_WHITE);
9292
precision_recall_threads_wrapper(sc_data_ptr, sc_groups);
93-
INFO(" done with precision-recall (round #1)");
94-
g.timers[TIME_PR_ALN].stop();
93+
INFO(" done with variant call evaluation (round 1: small variants)");
94+
g.timers[TIME_ALIGN_EVAL_1].stop();
9595

9696
// pull out FP and FN variants from first-round evaluation
9797
std::shared_ptr<variantData> query_ptr_fp(new variantData());
@@ -102,35 +102,51 @@ int main(int argc, char **argv) {
102102
query_ptr2->merge(query_ptr_fp);
103103
truth_ptr2->merge(truth_ptr_fn);
104104

105-
// perform second-round evaluation
106-
g.timers[TIME_CLUST].start();
105+
// perform second-round clustering (simple cluster large and FP/FN vars)
106+
g.timers[TIME_SIMPLE_CLUSTER].start();
107+
if (g.verbosity >= 1) INFO(" ");
108+
if (g.verbosity >= 1) INFO("%s[Q %d/%d] Simple clustering %s VCF (large and FP/FN vars)%s '%s'",
109+
COLOR_PURPLE, TIME_SIMPLE_CLUSTER, TIME_TOTAL-1, callset_strs[QUERY].data(), COLOR_WHITE,
110+
query_ptr2->filename.data());
107111
simple_cluster(query_ptr2, QUERY);
112+
if (g.verbosity >= 1) INFO(" ");
113+
if (g.verbosity >= 1) INFO("%s[T %d/%d] Simple clustering %s VCF (large and FP/FN vars)%s '%s'",
114+
COLOR_PURPLE, TIME_SIMPLE_CLUSTER, TIME_TOTAL-1, callset_strs[TRUTH].data(), COLOR_WHITE,
115+
truth_ptr2->filename.data());
108116
simple_cluster(truth_ptr2, TRUTH);
109-
g.timers[TIME_CLUST].stop();
110-
g.timers[TIME_SUPCLUST].start();
117+
118+
// second-round superclustering
119+
if (g.verbosity >= 1) INFO(" ");
120+
if (g.verbosity >= 1) INFO("%s[%d/%d] Superclustering TRUTH and QUERY variants (large and FP/FN vars)%s",
121+
COLOR_PURPLE, TIME_SIMPLE_CLUSTER, TIME_TOTAL-1, COLOR_WHITE);
111122
std::shared_ptr<superclusterData> sc_data_ptr2(
112123
new superclusterData(query_ptr2, truth_ptr2, ref_ptr));
113124
sc_groups = sort_superclusters(sc_data_ptr2);
114-
g.timers[TIME_SUPCLUST].stop();
115-
g.timers[TIME_PR_ALN].start();
125+
g.timers[TIME_SIMPLE_CLUSTER].stop();
126+
127+
// second-round evaluation: precision/recall and genotypes
128+
g.timers[TIME_ALIGN_EVAL_2].start();
129+
if (g.verbosity >= 1) INFO(" ");
130+
if (g.verbosity >= 1) INFO("%s[%d/%d] Evaluating variant calls (large and FP/FN vars)%s",
131+
COLOR_PURPLE, TIME_ALIGN_EVAL_2, TIME_TOTAL-1, COLOR_WHITE);
116132
precision_recall_threads_wrapper(sc_data_ptr2, sc_groups);
117-
INFO(" done with precision-recall (round #2)");
118-
g.timers[TIME_PR_ALN].stop();
133+
INFO(" done with variant call evaluation (round 2: large variants and round 1 FP/FN variants)");
134+
g.timers[TIME_ALIGN_EVAL_2].stop();
119135

120136
// merge first-round and second-round results
121137
// TODO: re-use add_variant_data to merge superclusters?
122138

123-
// calculate global phasings
139+
// calculate phasing statistics
124140
g.timers[TIME_PHASE].start();
125141
std::unique_ptr<phaseblockData> phasedata_ptr(new phaseblockData(sc_data_ptr));
126142
g.timers[TIME_PHASE].stop();
127143

128-
// write supercluster/phaseblock results in CSV format
144+
// write phasing results
129145
g.timers[TIME_WRITE].start();
130146
if (g.write) phasedata_ptr->write_switchflips();
131147
write_results(phasedata_ptr);
132148

133-
// save new VCF
149+
// write summary VCF with per-variant evaluation info
134150
if (g.write) {
135151
query_ptr->write_vcf(g.out_prefix + "query.vcf");
136152
truth_ptr->write_vcf(g.out_prefix + "truth.vcf");

src/timer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ double timer::total() {
2727
}
2828

2929
void timer::print(int i) {
30-
INFO(" [%d] %-16s: %8.3fs", i, name.data(), total());
30+
INFO(" [%d] %-37s: %8.3fs", i, name.data(), total());
3131
}

0 commit comments

Comments
 (0)