Skip to content

Commit aed0a1c

Browse files
committed
fix: loop bug in query graph
1 parent aa0ccea commit aed0a1c

File tree

1 file changed

+26
-11
lines changed

1 file changed

+26
-11
lines changed

src/dist.cpp

+26-11
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,7 @@ int calc_prec_recall_aln(
146146

147147
// allow bottom-right corner to move diagonally into next truth and query nodes
148148
// NOTE: separating this case out allows sync points between adjacent variants
149-
if (x.qi == int(graph->qseqs[x.qni].length())-1 && x.ti < int(graph->tseqs[x.tni].length()) &&
150-
x.ti == int(graph->tseqs[x.tni].length())-1 && x.qi < int(graph->qseqs[x.qni].length())) {
149+
if (x.qi == int(graph->qseqs[x.qni].length())-1 && x.ti == int(graph->tseqs[x.tni].length())-1) {
151150
for (int qni : graph->qnexts[x.qni]) { // for all next nodes
152151
for (int tni : graph->tnexts[x.tni]) {
153152
idx4 z(qni, tni, 0, 0);
@@ -200,7 +199,7 @@ int calc_prec_recall_aln(
200199
queue.push(y); curr_wave.insert(y); ptrs[y] = x;
201200
}
202201
}
203-
if (x.ti+1 < int(graph->tseqs[x.tni].length())) { // INS
202+
if (x.ti+1 < int(graph->tseqs[x.tni].length())) { // DEL
204203
idx4 y(x.qni, x.tni, x.qi, x.ti+1);
205204
if (!contains(done, y) && !contains(curr_wave, y)) {
206205
queue.push(y); curr_wave.insert(y); ptrs[y] = x;
@@ -230,6 +229,7 @@ int calc_prec_recall_aln(
230229

231230
/******************************************************************************/
232231

232+
233233
void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
234234
std::shared_ptr<fastaData> ref, const std::string & ctg, int truth_hi, bool print) {
235235

@@ -243,7 +243,9 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
243243
if (print) graph->print();
244244

245245
std::unordered_map<idx4, idx4> ptrs;
246-
bool aligned = calc_prec_recall_aln(graph, ptrs, false) < g.max_dist;
246+
int aln_score = calc_prec_recall_aln(graph, ptrs, false);
247+
if (print) printf("alignment score: %d\n", aln_score);
248+
bool aligned = aln_score < g.max_dist;
247249
if (aligned) { // alignment succeeded
248250
calc_prec_recall(graph, ptrs, truth_hi, false);
249251
done = true;
@@ -271,7 +273,13 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
271273
}
272274
// NOTE: sort FNs (if aligned) or all truth vars (if not aligned) by decreasing variant size
273275
std::sort(exclude_sizes.rbegin(), exclude_sizes.rend());
274-
276+
if (print) for (int i = 0; i < int(exclude_sizes.size()); i++) {
277+
int tvar_idx = exclude_sizes[i].second;
278+
printf(" exclude size %d, var %d = %s:%d (%s,%s)\n",
279+
exclude_sizes[i].first, tvar_idx, ctg.data(),
280+
tvars->poss[tvar_idx], tvars->refs[tvar_idx].data(),
281+
tvars->alts[tvar_idx].data());
282+
}
275283
// try a number of alignments without some of the largest variants, choose the best one
276284
if (not done) { // there are potential variants to exclude
277285
std::vector< std::pair<int, int> > exclude_dists;
@@ -299,12 +307,19 @@ void evaluate_variants(std::shared_ptr<ctgSuperclusters> scs, int sc_idx,
299307
std::shared_ptr<Graph> retry_graph(new Graph(scs, sc_idx, ref, ctg, truth_hi));
300308
if (print) retry_graph->print();
301309
std::unordered_map<idx4, idx4> retry_ptrs;
302-
int dist = calc_prec_recall_aln(retry_graph, retry_ptrs, print);
310+
int dist = calc_prec_recall_aln(retry_graph, retry_ptrs, false);
303311
exclude_dists.push_back(std::make_pair(dist, exclude_sizes[retry].second));
304312
}
305313

306314
// sort retried alignments in order of resulting edit distance
307315
std::sort(exclude_dists.begin(), exclude_dists.end());
316+
if (print) for (int i = 0; i < int(exclude_dists.size()); i++) {
317+
int tvar_idx = exclude_dists[i].second;
318+
printf(" exclude dist %d, var %d = %s:%d (%s,%s)\n",
319+
exclude_dists[i].first, tvar_idx, ctg.data(),
320+
tvars->poss[tvar_idx], tvars->refs[tvar_idx].data(),
321+
tvars->alts[tvar_idx].data());
322+
}
308323

309324
// the variant for which excluding resulted in lowest edit dist should be considered FN
310325
// prepare variants for the next iteration
@@ -391,16 +406,16 @@ void calc_prec_recall(
391406
// if we move out of a new query variant, mark it as included
392407
if (prev.qni != curr.qni && // new query node
393408
graph->qtypes[curr.qni] != TYPE_REF) { // node is variant
394-
if (print) printf("new query variant\n");
395409
int qvar_idx = graph->qidxs[curr.qni];
396410
sync_qvars.push_back(qvar_idx);
411+
if (print) printf("new query variant: %d\n", qvar_idx);
397412
}
398413
// if we move out of a query variant, include it in sync group and ref dist calc
399414
if (prev.tni != curr.tni && // new truth node
400415
graph->ttypes[curr.tni] != TYPE_REF) { // node is variant
401-
if (print) printf("new truth variant\n");
402416
int tvar_idx = graph->tidxs[curr.tni];
403417
sync_tvars.push_back(tvar_idx);
418+
if (print) printf("new truth variant: %d\n", tvar_idx);
404419
}
405420
// if the alignment is a substitution, insertion, or deletion
406421
if (prev.qni == curr.qni && prev.tni == curr.tni && // same matrix
@@ -786,7 +801,7 @@ Graph::Graph(
786801
// skip variants that we've already evaluated
787802
if (qvars->errtypes[truth_hap][var_idx] != ERRTYPE_UN) { continue; }
788803

789-
// prior to adding the next variant, add as many reference substrings as necessary
804+
// prior to adding the next variant, add as many reference nodes as necessary
790805
// due to earlier variants that end before it starts
791806
int var_pos = qvars->poss[var_idx];
792807
while (ref_pos < var_pos) {
@@ -853,10 +868,10 @@ Graph::Graph(
853868
for (int n2 = 0; n2 < this->qnodes; n2++) {
854869
if (n2 == n1) continue; // don't compare node to itself
855870
// add predecessor of current node
856-
if (this->qbegs[n1] == this->qends[n2])
871+
if (this->qbegs[n1] == this->qends[n2] && n2 < n1)
857872
this->qprevs[n1].push_back(n2);
858873
// add successor of current node
859-
if (this->qends[n1] == this->qbegs[n2])
874+
if (this->qends[n1] == this->qbegs[n2] && n1 < n2)
860875
this->qnexts[n1].push_back(n2);
861876
}
862877
}

0 commit comments

Comments
 (0)