@@ -180,28 +180,41 @@ void write_precision_recall(const std::unique_ptr<phaseblockData> & phasedata_pt
180
180
181
181
// add query
182
182
for (int vi = 0 ; vi < qvars->n ; vi++) {
183
+ int t = 0 ;
184
+ if (qvars->types [vi] == TYPE_SUB) { // SNP
185
+ t = VARTYPE_SNP;
186
+ } else if ((qvars->types [vi] == TYPE_INS && // small INDEL
187
+ int (qvars->alts [vi].size ()) < g.sv_threshold ) ||
188
+ (qvars->types [vi] == TYPE_DEL &&
189
+ int (qvars->refs [vi].size ()) < g.sv_threshold )) {
190
+ t = VARTYPE_INDEL;
191
+ } else { // SV
192
+ t = VARTYPE_SV;
193
+ }
183
194
for (int hi = 0 ; hi < HAPS; hi++) {
184
- if (!qvars->var_on_hap (vi, hi)) continue ;
185
195
int calc_hi = hi ^ qvars->calcgt_is_swapped (vi);
186
196
float q = qvars->callq [calc_hi][vi];
187
- int t = 0 ;
188
- if (qvars->types [vi] == TYPE_SUB) { // SNP
189
- t = VARTYPE_SNP;
190
- } else if ((qvars->types [vi] == TYPE_INS && // small INDEL
191
- int (qvars->alts [vi].size ()) < g.sv_threshold ) ||
192
- (qvars->types [vi] == TYPE_DEL &&
193
- int (qvars->refs [vi].size ()) < g.sv_threshold )) {
194
- t = VARTYPE_INDEL;
195
- } else { // SV
196
- t = VARTYPE_SV;
197
- }
198
- if (qvars->errtypes [calc_hi][vi] == ERRTYPE_UN) {
199
- WARN (" Unknown error type at QUERY %s:%d" , ctg.data (), qvars->poss [vi]);
200
- continue ;
201
- }
202
- for (int qual = g.min_qual ; qual <= q; qual++) {
203
- query_counts[t][ qvars->errtypes [calc_hi][vi] ][qual-g.min_qual ]++;
204
- query_counts[VARTYPE_ALL][ qvars->errtypes [calc_hi][vi] ][qual-g.min_qual ]++;
197
+ if (qvars->var_on_hap (vi, hi)) {
198
+ if (qvars->errtypes [calc_hi][vi] == ERRTYPE_UN) {
199
+ WARN (" Unknown error type at QUERY %s:%d" , ctg.data (), qvars->poss [vi]);
200
+ continue ;
201
+ }
202
+ for (int qual = g.min_qual ; qual <= q; qual++) {
203
+ query_counts[t][ qvars->errtypes [calc_hi][vi] ][qual-g.min_qual ]++;
204
+ query_counts[VARTYPE_ALL][ qvars->errtypes [calc_hi][vi] ][qual-g.min_qual ]++;
205
+ }
206
+ } else { // variant not present on this haplotype
207
+ // NOTE: custom logic for incorrect original allele count
208
+ // orig_gt is 1|0 (query), calc_gt (~truth) was 1|1, forced back to 0|1
209
+ // the extra 1 allele probably participated in a truth match, so decrement truth
210
+ if (qvars->ac_errtype [vi] == AC_ERR_2_TO_1) {
211
+ for (int qual = g.min_qual ; qual <= q; qual++) {
212
+ truth_counts[t][ERRTYPE_TP][qual-g.min_qual ]--;
213
+ truth_counts[VARTYPE_ALL][ERRTYPE_TP][qual-g.min_qual ]--;
214
+ truth_counts[t][ERRTYPE_FN][qual-g.min_qual ]++;
215
+ truth_counts[VARTYPE_ALL][ERRTYPE_FN][qual-g.min_qual ]++;
216
+ }
217
+ }
205
218
}
206
219
}
207
220
}
@@ -390,7 +403,7 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
390
403
FILE* out_phaseblocks = fopen (out_phaseblocks_fn.data (), " w" );
391
404
if (g.verbosity >= 1 ) INFO (" Writing phasing results to '%s'" , out_phaseblocks_fn.data ());
392
405
fprintf (out_phaseblocks, " CONTIG\t PHASE_BLOCK\t START\t STOP\t SIZE\t VARIANTS\t FLIP_ERRORS\t SWITCH_ERRORS\n " );
393
- for (std::string ctg : phasedata_ptr->contigs ) {
406
+ for (const std::string & ctg : phasedata_ptr->contigs ) {
394
407
std::shared_ptr<ctgPhaseblocks> ctg_pbs = phasedata_ptr->phase_blocks [ctg];
395
408
std::shared_ptr<ctgSuperclusters> ctg_scs = ctg_pbs->ctg_superclusters ;
396
409
for (int i = 0 ; i < ctg_pbs->n && ctg_scs->n > 0 ; i++) {
@@ -418,7 +431,7 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
418
431
if (g.verbosity >= 1 ) INFO (" Writing superclustering results to '%s'" , out_clusterings_fn.data ());
419
432
fprintf (out_clusterings, " CONTIG\t SUPERCLUSTER\t START\t STOP\t SIZE\t "
420
433
" QUERY_VARS\t TRUTH_VARS\n " );
421
- for (std::string ctg : phasedata_ptr->contigs ) {
434
+ for (const std::string & ctg : phasedata_ptr->contigs ) {
422
435
std::shared_ptr<ctgPhaseblocks> ctg_pbs = phasedata_ptr->phase_blocks [ctg];
423
436
std::shared_ptr<ctgSuperclusters> ctg_scs = ctg_pbs->ctg_superclusters ;
424
437
for (int i = 0 ; i < ctg_scs->n ; i++) {
0 commit comments