From 1bdd791539dcf69209580a3deaa34ec31bd11c42 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 16 Sep 2014 09:57:35 -0400 Subject: [PATCH] r837: removed BAM support This is not necessary. --- Makefile | 19 +++------------- bamlite.c | 2 -- bamlite.h | 2 -- bwa.c | 41 ---------------------------------- bwa.h | 32 --------------------------- bwamem.c | 45 +++++++------------------------------ bwamem.h | 6 +---- bwamem_extra.c | 12 ---------- bwamem_pair.c | 20 ++++++----------- bwaseqio.c | 44 ------------------------------------ fastmap.c | 60 ++++---------------------------------------------- main.c | 9 +------- 12 files changed, 24 insertions(+), 268 deletions(-) diff --git a/Makefile b/Makefile index b10cf81c..60c21049 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,6 @@ CC= gcc #CC= clang --analyze CFLAGS= -g -Wall -Wno-unused-function -O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS -HTSLIB_PATH= AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o @@ -18,27 +17,15 @@ SUBDIRS= . .SUFFIXES:.c .o .cc .c.o: -ifneq ($(HTSLIB_PATH),) - $(CC) -c $(CFLAGS) $(DFLAGS) -DUSE_HTSLIB $(INCLUDES) -I$(HTSLIB_PATH) $< -o $@ -else - $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ -endif + $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ all:$(PROG) bwa:libbwa.a $(AOBJS) main.o -ifneq ($(HTSLIB_PATH),) - $(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ $(HTSLIB_PATH)/libhts.a -L. -lbwa $(LIBS) -else - $(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) -endif + $(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) bwamem-lite:libbwa.a example.o -ifneq ($(HTSLIB_PATH),) - $(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ $(HTSLIB_PATH)/libhts.a -L. -lbwa $(LIBS) -else - $(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS) -endif + $(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS) libbwa.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) diff --git a/bamlite.c b/bamlite.c index f1c2e624..3704bebc 100644 --- a/bamlite.c +++ b/bamlite.c @@ -1,4 +1,3 @@ -#ifndef USE_HTSLIB #include #include #include @@ -209,4 +208,3 @@ int bamlite_gzclose(gzFile file) { return ret; } #endif /* USE_VERBOSE_ZLIB_WRAPPERS */ -#endif diff --git a/bamlite.h b/bamlite.h index 443926d3..efab7ac8 100644 --- a/bamlite.h +++ b/bamlite.h @@ -1,6 +1,5 @@ #ifndef BAMLITE_H_ #define BAMLITE_H_ -#ifndef USE_HTSLIB #include #include @@ -113,4 +112,3 @@ extern "C" { #endif #endif -#endif diff --git a/bwa.c b/bwa.c index d584385b..43d8a582 100644 --- a/bwa.c +++ b/bwa.c @@ -7,9 +7,6 @@ #include "ksw.h" #include "utils.h" #include "kstring.h" -#ifdef USE_HTSLIB -#include -#endif #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" @@ -271,27 +268,12 @@ void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line) { int i; extern char *bwa_pg; - err_printf("@HD\tVN:1.4\tSO:unknown\n"); for (i = 0; i < bns->n_seqs; ++i) err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len); if (rg_line) err_printf("%s\n", rg_line); err_printf("%s\n", bwa_pg); } -#ifdef USE_HTSLIB -void bwa_format_sam_hdr(const bntseq_t *bns, const char *rg_line, kstring_t *str) -{ - int i; - extern char *bwa_pg; - str->l = 0; str->s = 0; - ksprintf(str, "@HD\tVN:1.4\tSO:unknown\n"); - for (i = 0; i < bns->n_seqs; ++i) - ksprintf(str, "@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len); - if (rg_line) ksprintf(str, "%s\n", rg_line); - ksprintf(str, "%s\n", bwa_pg); -} -#endif - static char *bwa_escape(char *s) { char *p, *q; @@ -337,26 +319,3 @@ char *bwa_set_rg(const char *s) return 0; } -#ifdef USE_HTSLIB -bams_t *bams_init() { - return calloc(1, sizeof(bams_t)); -} - -void bams_add(bams_t *bams, bam1_t *b) { - if (bams->m == bams->l) { - bams->m = bams->m << 2; - bams->bams = realloc(bams->bams, sizeof(bam1_t) * bams->m); - } - bams->bams[bams->l] = b; - bams->l++; -} - -void bams_destroy(bams_t *bams) { - int i; - for (i = 0; i < bams->l; i++) { - bam_destroy1(bams->bams[i]); - } - free(bams->bams); - free(bams); -} -#endif diff --git a/bwa.h b/bwa.h index 3da86983..bbc25254 100644 --- a/bwa.h +++ b/bwa.h @@ -4,9 +4,6 @@ #include #include "bntseq.h" #include "bwt.h" -#ifdef USE_HTSLIB -#include -#endif #define BWA_IDX_BWT 0x1 #define BWA_IDX_BNS 0x2 @@ -19,31 +16,11 @@ typedef struct { uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base } bwaidx_t; -#ifdef USE_HTSLIB -typedef struct { - int l, m; - bam1_t **bams; -} bams_t; -#endif - typedef struct { int l_seq; -#ifdef USE_HTSLIB - char *name, *comment, *seq, *qual; - bams_t *bams; -#else char *name, *comment, *seq, *qual, *sam; -#endif } bseq1_t; -// This is here to faciliate passing around HTSLIB's bam_hdr_t structure when we are not compiling with HTSLIB -#ifndef USE_HTSLIB -typedef struct { - void *ptr; -} bam_hdr_t; // DO NOT USE -#endif - - extern int bwa_verbose; extern char bwa_rg_id[256]; @@ -64,17 +41,8 @@ extern "C" { void bwa_idx_destroy(bwaidx_t *idx); void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line); -#ifdef USE_HTSLIB - void bwa_format_sam_hdr(const bntseq_t *bns, const char *rg_line, kstring_t *str); -#endif char *bwa_set_rg(const char *s); -#ifdef USE_HTSLIB - bams_t *bams_init(); - void bams_add(bams_t *bams, bam1_t *b); - void bams_destroy(bams_t *bams); -#endif - #ifdef __cplusplus } #endif diff --git a/bwamem.c b/bwamem.c index 95d7c2f8..64c26fab 100644 --- a/bwamem.c +++ b/bwamem.c @@ -16,10 +16,6 @@ #include "ksort.h" #include "utils.h" -#ifdef USE_HTSLIB -#include -#endif - #ifdef USE_MALLOC_WRAPPERS # include "malloc_wrap.h" #endif @@ -787,7 +783,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar) return l; } -void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, bam_hdr_t *h) +void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_) { int i, l_name; mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert @@ -912,15 +908,6 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq if (str->s[i] == '\t') str->s[i] = ' '; } kputc('\n', str); - -#ifdef USE_HTSLIB - bam1_t *b = bam_init1(); - if (sam_parse1(str, h, b) < 0) { - // TODO: error! - } - bams_add(s->bams, b); - str->l = 0; str->s = 0; -#endif } /************************ @@ -954,7 +941,7 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a) } // TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible -void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m, bam_hdr_t *h) +void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m) { extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query); kstring_t str; @@ -986,16 +973,14 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_aln_t t; t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0); t.flag |= extra_flag; - mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m, h); + mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m); } else { for (k = 0; k < aa.n; ++k) - mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m, h); + mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m); for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar); free(aa.a); } -#ifndef USE_HTSLIB s->sam = str.s; -#endif if (XA) { for (k = 0; k < a->n; ++k) free(XA[k]); free(XA); @@ -1123,7 +1108,6 @@ typedef struct { bseq1_t *seqs; mem_alnreg_v *regs; int64_t n_processed; - bam_hdr_t *h; } worker_t; static void worker1(void *data, int i, int tid) @@ -1142,33 +1126,26 @@ static void worker1(void *data, int i, int tid) static void worker2(void *data, int i, int tid) { - extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], bam_hdr_t *h); + extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]); extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a); worker_t *w = (worker_t*)data; if (!(w->opt->flag&MEM_F_PE)) { -#ifdef USE_HTSLIB - w->seqs[i].bams = bams_init(); -#endif if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); if (w->opt->flag & MEM_F_ALN_REG) { mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]); } else { mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); - mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0, w->h); + mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); } free(w->regs[i].a); } else { if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name); -#ifdef USE_HTSLIB - w->seqs[i<<1].bams = bams_init(); - w->seqs[1+(i<<1)].bams = bams_init(); -#endif - mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1], w->h); + mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1]); free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a); } } -void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, bam_hdr_t *h) +void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0) { extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n); worker_t w; @@ -1185,7 +1162,6 @@ void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *b w.aux = malloc(opt->n_threads * sizeof(smem_aux_t)); for (i = 0; i < opt->n_threads; ++i) w.aux[i] = smem_aux_init(); - w.h = h; kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions for (i = 0; i < opt->n_threads; ++i) smem_aux_destroy(w.aux[i]); @@ -1199,8 +1175,3 @@ void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *b if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime); } - -void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0) -{ - mem_process_seqs2(opt, bwt, bns, pac, n_processed, n, seqs, pes0, 0); -} diff --git a/bwamem.h b/bwamem.h index e65e3b26..8b8ec0d6 100644 --- a/bwamem.h +++ b/bwamem.h @@ -53,9 +53,6 @@ typedef struct { int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end int max_hits; // if there are max_hits or fewer, output them all int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset -#ifdef USE_HTSLIB - int bam_output; -#endif } mem_opt_t; typedef struct { @@ -130,10 +127,8 @@ extern "C" { * @param seqs query sequences; $seqs[i].seq/sam to be modified after the call * @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements, * corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info. - * @param h the BAM header, NULL if not using HTSLIB */ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0); - void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, bam_hdr_t *h); /** * Find the aligned regions for one query sequence @@ -165,6 +160,7 @@ extern "C" { * @return CIGAR, strand, mapping quality and forward-strand position */ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar); + mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar, const char *name); /** * Infer the insert size distribution from interleaved alignment regions diff --git a/bwamem_extra.c b/bwamem_extra.c index b56a51cd..45289eab 100644 --- a/bwamem_extra.c +++ b/bwamem_extra.c @@ -93,11 +93,7 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t * return ar; } -#ifdef USE_HTSLIB -void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, bam_hdr_t *h) -#else void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a) -#endif { int i; kstring_t str = {0,0,0}; @@ -119,15 +115,7 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb)); kputc('\n', &str); } -#ifdef USE_HTSLIB - bam1_t *b = bam_init1(); - if (sam_parse1(&str, h, b) < 0) { - // TODO: error! - } - bams_add(s->bams, b); -#else s->sam = str.s; -#endif } // Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup. diff --git a/bwamem_pair.c b/bwamem_pair.c index 66dd8d17..f2124276 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -242,12 +242,12 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons #define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499)) -int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], bam_hdr_t *header) +int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]) { extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id); extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a); - extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m, bam_hdr_t *h); - extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, bam_hdr_t *h); + extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m); + extern void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m); extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query); int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2]; @@ -327,14 +327,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co // write SAM h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0; h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0; - mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1], header); -#ifndef USE_HTSLIB - s[0].sam = strdup(str.s); str.l = 0; -#endif - mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0], header); -#ifndef USE_HTSLIB - s[1].sam = str.s; -#endif + mem_aln2sam(opt, bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0; + mem_aln2sam(opt, bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s; if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); // h[0].XA will be freed in the following block free(h[1].cigar); @@ -360,8 +354,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist); if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2; } - mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1], header); - mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0], header); + mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]); + mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]); if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name); free(h[0].cigar); free(h[1].cigar); return n; diff --git a/bwaseqio.c b/bwaseqio.c index 3512d7d8..d850307c 100644 --- a/bwaseqio.c +++ b/bwaseqio.c @@ -2,11 +2,7 @@ #include #include "bwtaln.h" #include "utils.h" -#ifdef USE_HTSLIB -#include -#else #include "bamlite.h" -#endif #include "kseq.h" KSEQ_DECLARE(gzFile) @@ -21,12 +17,7 @@ static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4 struct __bwa_seqio_t { // for BAM input int is_bam, which; // 1st bit: read1, 2nd bit: read2, 3rd: SE -#ifdef USE_HTSLIB - samFile *fp; - bam_hdr_t *h; -#else bamFile fp; -#endif // for fastq input kseq_t *ks; }; @@ -34,24 +25,14 @@ struct __bwa_seqio_t { bwa_seqio_t *bwa_bam_open(const char *fn, int which) { bwa_seqio_t *bs; -#ifndef USE_HTSLIB bam_header_t *h; -#endif bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t)); bs->is_bam = 1; bs->which = which; -#ifdef USE_HTSLIB - bs->fp = sam_open(fn, "rb"); -#else bs->fp = bam_open(fn, "r"); -#endif if (0 == bs->fp) err_fatal_simple("Couldn't open bam file"); -#ifdef USE_HTSLIB - bs->h = sam_hdr_read(bs->fp); -#else h = bam_header_read(bs->fp); bam_header_destroy(h); -#endif return bs; } @@ -69,12 +50,7 @@ void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; if (bs->is_bam) { -#ifdef USE_HTSLIB - if (0 != sam_close(bs->fp)) err_fatal_simple("Error closing sam/bam file"); - bam_hdr_destroy(bs->h); -#else if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); -#endif } else { err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); @@ -125,11 +101,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com b = bam_init1(); n_seqs = 0; seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); -#ifdef USE_HTSLIB - while ((res = sam_read1(bs->fp, bs->h, b)) >= 0) { -#else while ((res = bam_read1(bs->fp, b)) >= 0) { -#endif uint8_t *s, *q; int go = 0; if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1; @@ -142,26 +114,14 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com p->qual = 0; p->full_len = p->clip_len = p->len = l; n_tot += p->full_len; -#ifdef USE_HTSLIB - s = bam_get_seq(b); q = bam_get_qual(b); -#else s = bam1_seq(b); q = bam1_qual(b); -#endif p->seq = (ubyte_t*)calloc(p->len + 1, 1); p->qual = (ubyte_t*)calloc(p->len + 1, 1); for (i = 0; i != p->full_len; ++i) { -#ifdef USE_HTSLIB - p->seq[i] = bam_nt16_nt4_table[(int)bam_seqi(s, i)]; -#else p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)]; -#endif p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126; } -#ifdef USE_HTSLIB - if (bam_is_rev(b)) { // then reverse -#else if (bam1_strand(b)) { // then reverse -#endif seq_reverse(p->len, p->seq, 1); seq_reverse(p->len, p->qual, 0); } @@ -170,11 +130,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com memcpy(p->rseq, p->seq, p->len); seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped() seq_reverse(p->len, p->rseq, is_comp); -#ifdef USE_HTSLIB - p->name = strdup((const char*)bam_get_qname(b)); -#else p->name = strdup((const char*)bam1_qname(b)); -#endif if (n_seqs == n_needed) break; } if (res < 0 && res != -1) err_fatal_simple("Error reading bam file"); diff --git a/fastmap.c b/fastmap.c index 25d2b96e..6cdad68a 100644 --- a/fastmap.c +++ b/fastmap.c @@ -55,12 +55,7 @@ int main_mem(int argc, char *argv[]) opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); -#ifdef USE_HTSLIB - while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:o:")) >= 0) -#else - while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:")) >= 0) -#endif - { + while ((c = getopt(argc, argv, "epaFMCSPHVYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == 'x') mode = optarg; else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1; @@ -130,10 +125,6 @@ int main_mem(int argc, char *argv[]) if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d\n", __func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low); -#ifdef USE_HTSLIB - } else if (c == 'o') { - opt->bam_output = atoi(optarg); -#endif } else return 1; } @@ -182,9 +173,6 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " specify the mean, standard deviation (10%% of the mean if absent), max\n"); fprintf(stderr, " (4 sigma from the mean if absent) and min of the insert size distribution.\n"); fprintf(stderr, " FR orientation only. [inferred]\n"); -#ifdef USE_HTSLIB - fprintf(stderr, " -o INT 0 - BAM (compressed), 1 - BAM (uncompressed), 2 - SAM [%d]\n", opt->bam_output); -#endif fprintf(stderr, "\n"); fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n"); fprintf(stderr, "\n"); @@ -245,33 +233,8 @@ int main_mem(int argc, char *argv[]) opt->flag |= MEM_F_PE; } } - bam_hdr_t *h = NULL; // TODO -#ifdef USE_HTSLIB - samFile *out = NULL; - char *modes[] = {"wb", "wbu", "w"}; - switch (opt->bam_output) { - case 0: // BAM compressed - case 1: // BAM uncompressed - case 2: // SAM - out = sam_open("-", modes[opt->bam_output]); - break; - default: - fprintf(stderr, "Error: output format was out of range [%d]\n", opt->bam_output); - return 1; - } -#endif - if (!(opt->flag & MEM_F_ALN_REG)) { -#ifdef USE_HTSLIB - kstring_t str; - str.l = str.m = 0; str.s = 0; - bwa_format_sam_hdr(idx->bns, rg_line, &str); - h = sam_hdr_parse(str.l, str.s); - h->l_text = str.l; h->text = str.s; - sam_hdr_write(out, h); -#else + if (!(opt->flag & MEM_F_ALN_REG)) bwa_print_sam_hdr(idx->bns, rg_line); -#endif - } actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; while ((seqs = bseq_read(actual_chunk_size, &n, ks, ks2)) != 0) { int64_t size = 0; @@ -287,22 +250,11 @@ int main_mem(int argc, char *argv[]) for (i = 0; i < n; ++i) size += seqs[i].l_seq; if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, n, (long)size); - mem_process_seqs2(opt, idx->bwt, idx->bns, idx->pac, n_processed, n, seqs, pes0, h); + mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n_processed, n, seqs, pes0); n_processed += n; for (i = 0; i < n; ++i) { -#ifdef USE_HTSLIB - if (seqs[i].bams) { - int j; - for (j = 0; j < seqs[i].bams->l; j++) { - sam_write1(out, h, seqs[i].bams->bams[j]); - } - } - bams_destroy(seqs[i].bams); seqs[i].bams = NULL; -#else if (seqs[i].sam) err_fputs(seqs[i].sam, stdout); - free(seqs[i].sam); -#endif - free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); + free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam); } free(seqs); } @@ -311,10 +263,6 @@ int main_mem(int argc, char *argv[]) bwa_idx_destroy(idx); kseq_destroy(ks); err_gzclose(fp); kclose(ko); -#ifdef USE_HTSLIB - sam_close(out); - bam_hdr_destroy(h); -#endif if (ks2) { kseq_destroy(ks2); err_gzclose(fp2); kclose(ko2); diff --git a/main.c b/main.c index 057fe541..1505f9de 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r836-dirty" +#define PACKAGE_VERSION "0.7.10-r837-dirty" #endif int bwa_fa2pac(int argc, char *argv[]); @@ -86,15 +86,8 @@ int main(int argc, char *argv[]) fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; } -#ifdef USE_HTSLIB - if (strcmp(argv[1], "mem") != 0) { - err_fflush(stdout); - err_fclose(stdout); - } -#else err_fflush(stdout); err_fclose(stdout); -#endif if (ret == 0) { fprintf(stderr, "[%s] Version: %s\n", __func__, PACKAGE_VERSION); fprintf(stderr, "[%s] CMD:", __func__);