@@ -41,10 +41,10 @@ int main(int argc, char **argv) {
41
41
check_contigs (query_ptr, truth_ptr, ref_ptr);
42
42
43
43
// cluster query VCF
44
- g.timers [TIME_CLUST ].start ();
44
+ g.timers [TIME_EXACT_CLUSTER ].start ();
45
45
if (g.verbosity >= 1 ) INFO (" " );
46
- if (g.verbosity >= 1 ) INFO (" %s[Q %d/%d] Clustering %s VCF%s '%s'" ,
47
- COLOR_PURPLE, TIME_CLUST , TIME_TOTAL-1 , callset_strs[QUERY].data (),
46
+ if (g.verbosity >= 1 ) INFO (" %s[Q %d/%d] Exact clustering %s VCF (small vars) %s '%s'" ,
47
+ COLOR_PURPLE, TIME_EXACT_CLUSTER , TIME_TOTAL-1 , callset_strs[QUERY].data (),
48
48
COLOR_WHITE, query_ptr->filename .data ());
49
49
std::vector<std::thread> threads;
50
50
for (int t = 0 ; t < HAPS*int (query_ptr->contigs .size ()); t++) {
@@ -57,13 +57,11 @@ int main(int argc, char **argv) {
57
57
}
58
58
}
59
59
for (std::thread & thread : threads) thread.join ();
60
- g.timers [TIME_CLUST].stop ();
61
60
62
61
// cluster truth VCF
63
- g.timers [TIME_CLUST].start ();
64
62
if (g.verbosity >= 1 ) INFO (" " );
65
- if (g.verbosity >= 1 ) INFO (" %s[T %d/%d] Clustering %s VCF%s '%s'" ,
66
- COLOR_PURPLE, TIME_CLUST , TIME_TOTAL-1 , callset_strs[TRUTH].data (),
63
+ if (g.verbosity >= 1 ) INFO (" %s[T %d/%d] Exact clustering %s VCF (small vars) %s '%s'" ,
64
+ COLOR_PURPLE, TIME_EXACT_CLUSTER , TIME_TOTAL-1 , callset_strs[TRUTH].data (),
67
65
COLOR_WHITE, truth_ptr->filename .data ());
68
66
threads.clear ();
69
67
for (int t = 0 ; t < HAPS*int (truth_ptr->contigs .size ()); t++) {
@@ -76,22 +74,24 @@ int main(int argc, char **argv) {
76
74
}
77
75
}
78
76
for (std::thread & thread : threads) thread.join ();
79
- g.timers [TIME_CLUST].stop ();
80
77
81
- // calculate superclusters
82
- g.timers [TIME_SUPCLUST].start ();
78
+ // first-round superclustering
79
+ if (g.verbosity >= 1 ) INFO (" " );
80
+ if (g.verbosity >= 1 ) INFO (" %s[%d/%d] Superclustering TRUTH and QUERY variants (small vars)%s" ,
81
+ COLOR_PURPLE, TIME_EXACT_CLUSTER, TIME_TOTAL-1 , COLOR_WHITE);
83
82
std::shared_ptr<superclusterData> sc_data_ptr (
84
83
new superclusterData (query_ptr, truth_ptr, ref_ptr));
85
-
86
- // calculate supercluster sizes
87
84
auto sc_groups = sort_superclusters (sc_data_ptr);
88
- g.timers [TIME_SUPCLUST ].stop ();
85
+ g.timers [TIME_EXACT_CLUSTER ].stop ();
89
86
90
- // calculate precision/recall and genotypes
91
- g.timers [TIME_PR_ALN].start ();
87
+ // first-round evaluation: precision/recall and genotypes
88
+ g.timers [TIME_ALIGN_EVAL_1].start ();
89
+ if (g.verbosity >= 1 ) INFO (" " );
90
+ if (g.verbosity >= 1 ) INFO (" %s[%d/%d] Evaluating variant calls (small vars)%s" ,
91
+ COLOR_PURPLE, TIME_ALIGN_EVAL_1, TIME_TOTAL-1 , COLOR_WHITE);
92
92
precision_recall_threads_wrapper (sc_data_ptr, sc_groups);
93
- INFO (" done with precision-recall (round #1 )" );
94
- g.timers [TIME_PR_ALN ].stop ();
93
+ INFO (" done with variant call evaluation (round 1: small variants )" );
94
+ g.timers [TIME_ALIGN_EVAL_1 ].stop ();
95
95
96
96
// pull out FP and FN variants from first-round evaluation
97
97
std::shared_ptr<variantData> query_ptr_fp (new variantData ());
@@ -102,35 +102,51 @@ int main(int argc, char **argv) {
102
102
query_ptr2->merge (query_ptr_fp);
103
103
truth_ptr2->merge (truth_ptr_fn);
104
104
105
- // perform second-round evaluation
106
- g.timers [TIME_CLUST].start ();
105
+ // perform second-round clustering (simple cluster large and FP/FN vars)
106
+ g.timers [TIME_SIMPLE_CLUSTER].start ();
107
+ if (g.verbosity >= 1 ) INFO (" " );
108
+ if (g.verbosity >= 1 ) INFO (" %s[Q %d/%d] Simple clustering %s VCF (large and FP/FN vars)%s '%s'" ,
109
+ COLOR_PURPLE, TIME_SIMPLE_CLUSTER, TIME_TOTAL-1 , callset_strs[QUERY].data (), COLOR_WHITE,
110
+ query_ptr2->filename .data ());
107
111
simple_cluster (query_ptr2, QUERY);
112
+ if (g.verbosity >= 1 ) INFO (" " );
113
+ if (g.verbosity >= 1 ) INFO (" %s[T %d/%d] Simple clustering %s VCF (large and FP/FN vars)%s '%s'" ,
114
+ COLOR_PURPLE, TIME_SIMPLE_CLUSTER, TIME_TOTAL-1 , callset_strs[TRUTH].data (), COLOR_WHITE,
115
+ truth_ptr2->filename .data ());
108
116
simple_cluster (truth_ptr2, TRUTH);
109
- g.timers [TIME_CLUST].stop ();
110
- g.timers [TIME_SUPCLUST].start ();
117
+
118
+ // second-round superclustering
119
+ if (g.verbosity >= 1 ) INFO (" " );
120
+ if (g.verbosity >= 1 ) INFO (" %s[%d/%d] Superclustering TRUTH and QUERY variants (large and FP/FN vars)%s" ,
121
+ COLOR_PURPLE, TIME_SIMPLE_CLUSTER, TIME_TOTAL-1 , COLOR_WHITE);
111
122
std::shared_ptr<superclusterData> sc_data_ptr2 (
112
123
new superclusterData (query_ptr2, truth_ptr2, ref_ptr));
113
124
sc_groups = sort_superclusters (sc_data_ptr2);
114
- g.timers [TIME_SUPCLUST].stop ();
115
- g.timers [TIME_PR_ALN].start ();
125
+ g.timers [TIME_SIMPLE_CLUSTER].stop ();
126
+
127
+ // second-round evaluation: precision/recall and genotypes
128
+ g.timers [TIME_ALIGN_EVAL_2].start ();
129
+ if (g.verbosity >= 1 ) INFO (" " );
130
+ if (g.verbosity >= 1 ) INFO (" %s[%d/%d] Evaluating variant calls (large and FP/FN vars)%s" ,
131
+ COLOR_PURPLE, TIME_ALIGN_EVAL_2, TIME_TOTAL-1 , COLOR_WHITE);
116
132
precision_recall_threads_wrapper (sc_data_ptr2, sc_groups);
117
- INFO (" done with precision-recall (round #2 )" );
118
- g.timers [TIME_PR_ALN ].stop ();
133
+ INFO (" done with variant call evaluation (round 2: large variants and round 1 FP/FN variants )" );
134
+ g.timers [TIME_ALIGN_EVAL_2 ].stop ();
119
135
120
136
// merge first-round and second-round results
121
137
// TODO: re-use add_variant_data to merge superclusters?
122
138
123
- // calculate global phasings
139
+ // calculate phasing statistics
124
140
g.timers [TIME_PHASE].start ();
125
141
std::unique_ptr<phaseblockData> phasedata_ptr (new phaseblockData (sc_data_ptr));
126
142
g.timers [TIME_PHASE].stop ();
127
143
128
- // write supercluster/phaseblock results in CSV format
144
+ // write phasing results
129
145
g.timers [TIME_WRITE].start ();
130
146
if (g.write ) phasedata_ptr->write_switchflips ();
131
147
write_results (phasedata_ptr);
132
148
133
- // save new VCF
149
+ // write summary VCF with per-variant evaluation info
134
150
if (g.write ) {
135
151
query_ptr->write_vcf (g.out_prefix + " query.vcf" );
136
152
truth_ptr->write_vcf (g.out_prefix + " truth.vcf" );
0 commit comments