@@ -146,8 +146,7 @@ int calc_prec_recall_aln(
146
146
147
147
// allow bottom-right corner to move diagonally into next truth and query nodes
148
148
// NOTE: separating this case out allows sync points between adjacent variants
149
- if (x.qi == int (graph->qseqs [x.qni ].length ())-1 && x.ti < int (graph->tseqs [x.tni ].length ()) &&
150
- x.ti == int (graph->tseqs [x.tni ].length ())-1 && x.qi < int (graph->qseqs [x.qni ].length ())) {
149
+ if (x.qi == int (graph->qseqs [x.qni ].length ())-1 && x.ti == int (graph->tseqs [x.tni ].length ())-1 ) {
151
150
for (int qni : graph->qnexts [x.qni ]) { // for all next nodes
152
151
for (int tni : graph->tnexts [x.tni ]) {
153
152
idx4 z (qni, tni, 0 , 0 );
@@ -200,7 +199,7 @@ int calc_prec_recall_aln(
200
199
queue.push (y); curr_wave.insert (y); ptrs[y] = x;
201
200
}
202
201
}
203
- if (x.ti +1 < int (graph->tseqs [x.tni ].length ())) { // INS
202
+ if (x.ti +1 < int (graph->tseqs [x.tni ].length ())) { // DEL
204
203
idx4 y (x.qni , x.tni , x.qi , x.ti +1 );
205
204
if (!contains (done, y) && !contains (curr_wave, y)) {
206
205
queue.push (y); curr_wave.insert (y); ptrs[y] = x;
@@ -230,6 +229,7 @@ int calc_prec_recall_aln(
230
229
231
230
/* *****************************************************************************/
232
231
232
+
233
233
void evaluate_variants (std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
234
234
std::shared_ptr<fastaData> ref, const std::string & ctg, int truth_hi, bool print) {
235
235
@@ -243,7 +243,9 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
243
243
if (print) graph->print ();
244
244
245
245
std::unordered_map<idx4, idx4> ptrs;
246
- bool aligned = calc_prec_recall_aln (graph, ptrs, false ) < g.max_dist ;
246
+ int aln_score = calc_prec_recall_aln (graph, ptrs, false );
247
+ if (print) printf (" alignment score: %d\n " , aln_score);
248
+ bool aligned = aln_score < g.max_dist ;
247
249
if (aligned) { // alignment succeeded
248
250
calc_prec_recall (graph, ptrs, truth_hi, false );
249
251
done = true ;
@@ -271,7 +273,13 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
271
273
}
272
274
// NOTE: sort FNs (if aligned) or all truth vars (if not aligned) by decreasing variant size
273
275
std::sort (exclude_sizes.rbegin (), exclude_sizes.rend ());
274
-
276
+ if (print) for (int i = 0 ; i < int (exclude_sizes.size ()); i++) {
277
+ int tvar_idx = exclude_sizes[i].second ;
278
+ printf (" exclude size %d, var %d = %s:%d (%s,%s)\n " ,
279
+ exclude_sizes[i].first , tvar_idx, ctg.data (),
280
+ tvars->poss [tvar_idx], tvars->refs [tvar_idx].data (),
281
+ tvars->alts [tvar_idx].data ());
282
+ }
275
283
// try a number of alignments without some of the largest variants, choose the best one
276
284
if (not done) { // there are potential variants to exclude
277
285
std::vector< std::pair<int , int > > exclude_dists;
@@ -299,12 +307,19 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
299
307
std::shared_ptr<Graph> retry_graph (new Graph (scs, sc_idx, ref, ctg, truth_hi));
300
308
if (print) retry_graph->print ();
301
309
std::unordered_map<idx4, idx4> retry_ptrs;
302
- int dist = calc_prec_recall_aln (retry_graph, retry_ptrs, print );
310
+ int dist = calc_prec_recall_aln (retry_graph, retry_ptrs, false );
303
311
exclude_dists.push_back (std::make_pair (dist, exclude_sizes[retry].second ));
304
312
}
305
313
306
314
// sort retried alignments in order of resulting edit distance
307
315
std::sort (exclude_dists.begin (), exclude_dists.end ());
316
+ if (print) for (int i = 0 ; i < int (exclude_dists.size ()); i++) {
317
+ int tvar_idx = exclude_dists[i].second ;
318
+ printf (" exclude dist %d, var %d = %s:%d (%s,%s)\n " ,
319
+ exclude_dists[i].first , tvar_idx, ctg.data (),
320
+ tvars->poss [tvar_idx], tvars->refs [tvar_idx].data (),
321
+ tvars->alts [tvar_idx].data ());
322
+ }
308
323
309
324
// the variant for which excluding resulted in lowest edit dist should be considered FN
310
325
// prepare variants for the next iteration
@@ -391,16 +406,16 @@ void calc_prec_recall(
391
406
// if we move out of a new query variant, mark it as included
392
407
if (prev.qni != curr.qni && // new query node
393
408
graph->qtypes [curr.qni ] != TYPE_REF) { // node is variant
394
- if (print) printf (" new query variant\n " );
395
409
int qvar_idx = graph->qidxs [curr.qni ];
396
410
sync_qvars.push_back (qvar_idx);
411
+ if (print) printf (" new query variant: %d\n " , qvar_idx);
397
412
}
398
413
// if we move out of a query variant, include it in sync group and ref dist calc
399
414
if (prev.tni != curr.tni && // new truth node
400
415
graph->ttypes [curr.tni ] != TYPE_REF) { // node is variant
401
- if (print) printf (" new truth variant\n " );
402
416
int tvar_idx = graph->tidxs [curr.tni ];
403
417
sync_tvars.push_back (tvar_idx);
418
+ if (print) printf (" new truth variant: %d\n " , tvar_idx);
404
419
}
405
420
// if the alignment is a substitution, insertion, or deletion
406
421
if (prev.qni == curr.qni && prev.tni == curr.tni && // same matrix
@@ -786,7 +801,7 @@ Graph::Graph(
786
801
// skip variants that we've already evaluated
787
802
if (qvars->errtypes [truth_hap][var_idx] != ERRTYPE_UN) { continue ; }
788
803
789
- // prior to adding the next variant, add as many reference substrings as necessary
804
+ // prior to adding the next variant, add as many reference nodes as necessary
790
805
// due to earlier variants that end before it starts
791
806
int var_pos = qvars->poss [var_idx];
792
807
while (ref_pos < var_pos) {
@@ -853,10 +868,10 @@ Graph::Graph(
853
868
for (int n2 = 0 ; n2 < this ->qnodes ; n2++) {
854
869
if (n2 == n1) continue ; // don't compare node to itself
855
870
// add predecessor of current node
856
- if (this ->qbegs [n1] == this ->qends [n2])
871
+ if (this ->qbegs [n1] == this ->qends [n2] && n2 < n1 )
857
872
this ->qprevs [n1].push_back (n2);
858
873
// add successor of current node
859
- if (this ->qends [n1] == this ->qbegs [n2])
874
+ if (this ->qends [n1] == this ->qbegs [n2] && n1 < n2 )
860
875
this ->qnexts [n1].push_back (n2);
861
876
}
862
877
}
0 commit comments