From 64d2ffb0f40bdd098cc0c716c2e3bfebaa943ee6 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 13 Feb 2019 16:16:54 +0100 Subject: [PATCH 1/3] added 2 options -tn5 and -ext --- .gitignore | 2 + src/genomeCoverageBed/genomeCoverageBed.cpp | 140 ++++++++++++++++--- src/genomeCoverageBed/genomeCoverageBed.h | 7 +- src/genomeCoverageBed/genomeCoverageMain.cpp | 42 +++++- test/genomecov/test-genomecov.sh | 22 +++ 5 files changed, 191 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index d510d7780..b558cbea9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ src/utils/version/version_git.h .cproject nbproject sandbox +.directory +.vscode/* \ No newline at end of file diff --git a/src/genomeCoverageBed/genomeCoverageBed.cpp b/src/genomeCoverageBed/genomeCoverageBed.cpp index 0b58c4dfa..213a6a5be 100644 --- a/src/genomeCoverageBed/genomeCoverageBed.cpp +++ b/src/genomeCoverageBed/genomeCoverageBed.cpp @@ -22,7 +22,8 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile, bool only_5p_end, bool only_3p_end, bool pair_chip, bool haveSize, int fragmentSize, bool dUTP, bool eachBaseZeroBased, - bool add_gb_track_line, string gb_track_line_opts) { + bool add_gb_track_line, string gb_track_line_opts, + int extensionSize, bool tn5) { _bedFile = bedFile; _genomeFile = genomeFile; @@ -45,6 +46,8 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile, _dUTP = dUTP; _add_gb_track_line = add_gb_track_line; _gb_track_line_opts = gb_track_line_opts; + _extensionSize = extensionSize; + _tn5 = tn5; _currChromName = ""; _currChromSize = 0 ; @@ -138,14 +141,47 @@ void BedGenomeCoverage::AddCoverage(int start, int end) { } -void BedGenomeCoverage::AddBlockedCoverage(const vector &bedBlocks) { +void BedGenomeCoverage::AddBlockedCoverage(const vector &bedBlocks,string strand) { vector::const_iterator bedItr = bedBlocks.begin(); vector::const_iterator bedEnd = bedBlocks.end(); + bool isEmpty=(bedItr==bedEnd); + bool isFirst=true; + int pos_start=0; + int pos_end=0; for (; bedItr != bedEnd; ++bedItr) { + // I need to Add the Coverage of the previous step as the final step has + // additional modifications + if(!isFirst){ + if (pos_start<0) { + AddCoverage(0,pos_end); + } + else + AddCoverage(pos_start,pos_end); + } // the end - 1 must be done because BamAncillary::getBamBlocks // returns ends uncorrected for the genomeCoverageBed data structure. // ugly, but necessary. - AddCoverage(bedItr->start, bedItr->end - 1); + pos_start=bedItr->start; + pos_end=bedItr->end - 1; + if (isFirst) { + if (_tn5 && (strand=="+")){ + pos_start = pos_start+4; + } + pos_start = pos_start - _extensionSize; + isFirst=false; + } + } + if (!isEmpty){ + // I modify the last block + if (_tn5 && (strand=="-")){ + pos_end = pos_end-5; + } + pos_end = pos_end + _extensionSize; + if (pos_start<0) { + AddCoverage(0,pos_end); + } + else + AddCoverage(pos_start,pos_end); } } @@ -180,18 +216,42 @@ void BedGenomeCoverage::CoverageBed() { if (_obeySplits == true) { bedVector bedBlocks; // vec to store the discrete BED "blocks" GetBedBlocks(a, bedBlocks); - AddBlockedCoverage(bedBlocks); + AddBlockedCoverage(bedBlocks,a.strand); } else if (_only_5p_end) { int pos = ( a.strand=="+" ) ? a.start : a.end-1; - AddCoverage(pos,pos); + if (_tn5) { + pos = ( a.strand=="+" ) ? pos+4 : pos-5; + } + if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :( + AddCoverage(0, pos+_extensionSize); + } + else { + AddCoverage(pos-_extensionSize, pos+_extensionSize ); + } } else if (_only_3p_end) { int pos = ( a.strand=="-" ) ? a.start : a.end-1; - AddCoverage(pos,pos); + if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :( + AddCoverage(0, pos+_extensionSize); + } + else { + AddCoverage(pos-_extensionSize, pos+_extensionSize ); + } + } + else { + int pos_start=a.start; + int pos_end=a.end-1; + if (_tn5) { + pos_start = ( a.strand=="+" ) ? pos_start+4 : pos_start; + pos_end = ( a.strand=="-" ) ? pos_end-5 : pos_end; + } + if ( pos_start<_extensionSize ) { + AddCoverage(0,pos_end+_extensionSize); + } + else + AddCoverage(pos_start-_extensionSize,pos_end+_extensionSize); } - else - AddCoverage(a.start, a.end-1); } } _bed->Close(); @@ -314,20 +374,52 @@ void BedGenomeCoverage::CoverageBam(string bamFile) { } else */ if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left - AddCoverage(bam.MatePosition, end); + int pos_start=bam.MatePosition; + int pos_end=end; + if (_tn5) { + pos_start = pos_start+4; + pos_end = pos_end-5; + } + if ( pos_start<_extensionSize ) { + AddCoverage(0,pos_end+_extensionSize); + } + else + AddCoverage(pos_start-_extensionSize,pos_end+_extensionSize); } else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right - AddCoverage(start, start + abs(bam.InsertSize) - 1); + int pos_start=start; + int pos_end=start + abs(bam.InsertSize) - 1; + if (_tn5) { + pos_start = pos_start+4; + pos_end = pos_end-5; + } + if ( pos_start<_extensionSize ) { + AddCoverage(0,pos_end+_extensionSize); + } + else + AddCoverage(pos_start-_extensionSize,pos_end+_extensionSize); } } else if (_haveSize) { if(bam.IsReverseStrand()) { - if(end<_fragmentSize) { //sometimes fragmentSize is bigger :( - AddCoverage(0, end); + int pos=end; + if (_tn5){ + pos=pos-5; + } + if(pos<(_fragmentSize+_extensionSize)) { //sometimes fragmentSize is bigger :( + AddCoverage(0, pos); } else { - AddCoverage(end + 1 - _fragmentSize, end ); + AddCoverage(pos + 1 - _fragmentSize - _extensionSize, pos + _extensionSize); } } else { - AddCoverage(start,start+_fragmentSize - 1); + int pos=start; + if (_tn5){ + pos=pos+4; + } + if(pos<_extensionSize){ + AddCoverage(0,pos+_fragmentSize - 1+_extensionSize); + } + else + AddCoverage(pos-_extensionSize,pos+_fragmentSize - 1+_extensionSize); } } else // add coverage accordingly. @@ -341,15 +433,29 @@ void BedGenomeCoverage::CoverageBam(string bamFile) { else { // "D" true, "N" false GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false); } - AddBlockedCoverage(bedBlocks); + string readStrand = ( !bam.IsReverseStrand() ) ? "+" : "-"; + AddBlockedCoverage(bedBlocks, readStrand); } else if (_only_5p_end) { int pos = ( !bam.IsReverseStrand() ) ? start : end; - AddCoverage(pos,pos); + if (_tn5) { + pos = ( !bam.IsReverseStrand() ) ? pos+4 : pos-5; + } + if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :( + AddCoverage(0, pos+_extensionSize); + } + else { + AddCoverage(pos-_extensionSize, pos+_extensionSize ); + } } else if (_only_3p_end) { int pos = ( bam.IsReverseStrand() ) ? start : end; - AddCoverage(pos,pos); + if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :( + AddCoverage(0, pos+_extensionSize); + } + else { + AddCoverage(pos-_extensionSize, pos+_extensionSize ); + } } } // close the BAM diff --git a/src/genomeCoverageBed/genomeCoverageBed.h b/src/genomeCoverageBed/genomeCoverageBed.h index 656f31953..71e3df7b2 100644 --- a/src/genomeCoverageBed/genomeCoverageBed.h +++ b/src/genomeCoverageBed/genomeCoverageBed.h @@ -50,7 +50,8 @@ class BedGenomeCoverage { bool only_5p_end, bool only_3p_end, bool pair_chip,bool haveSize, int fragmentSize, bool dUTP, bool eachBaseZeroBased, - bool add_gb_track_line, string gb_track_line_opts); + bool add_gb_track_line, string gb_track_line_opts, + int extensionSize, bool tn5); // destructor ~BedGenomeCoverage(void); @@ -79,6 +80,8 @@ class BedGenomeCoverage { bool _add_gb_track_line; string _gb_track_line_opts; string _requestedStrand; + int _extensionSize; + bool _tn5; BedFile *_bed; GenomeFile *_genome; @@ -102,7 +105,7 @@ class BedGenomeCoverage { void ResetChromCoverage(); void StartNewChrom (const string& chrom); void AddCoverage (int start, int end); - void AddBlockedCoverage(const vector &bedBlocks); + void AddBlockedCoverage(const vector &bedBlocks, string strand); void PrintFinalCoverage(); void PrintEmptyChromosomes(); void PrintTrackDefinitionLine(); diff --git a/src/genomeCoverageBed/genomeCoverageMain.cpp b/src/genomeCoverageBed/genomeCoverageMain.cpp index 942d6ef59..81a1226bf 100644 --- a/src/genomeCoverageBed/genomeCoverageMain.cpp +++ b/src/genomeCoverageBed/genomeCoverageMain.cpp @@ -33,6 +33,7 @@ int genomecoverage_main(int argc, char* argv[]) { string bedFile; string genomeFile; int max = INT_MAX; + int extensionSize = 0; float scale = 1.0; float fragmentSize = 146; //Nucleosome :) @@ -53,6 +54,7 @@ int genomecoverage_main(int argc, char* argv[]) { bool only_5p_end = false; bool only_3p_end = false; bool add_gb_track_line = false; + bool tn5 = false; string gb_track_opts; string requestedStrand = "X"; @@ -173,6 +175,18 @@ int genomecoverage_main(int argc, char* argv[]) { showHelp = true; } } + else if(PARAMETER_CHECK("-ext", 4, parameterLength)) { + if ((i+1) < argc) { + extensionSize = atoi(argv[i+1]); + i++; + } else { + cerr << "*****ERROR: -ext options requires an integer value" << endl; + showHelp = true; + } + } + else if(PARAMETER_CHECK("-tn5", 4, parameterLength)) { + tn5 = true; + } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; @@ -207,6 +221,16 @@ int genomecoverage_main(int argc, char* argv[]) { showHelp = true; } + /*if ( tn5 && obeySplits) { + cerr << endl << "*****" << endl << "*****ERROR: Use -split can't be used with -tn5." << endl << "*****" << endl; + showHelp = true; + } + + if ( (extensionSize>0) && obeySplits) { + cerr << endl << "*****" << endl << "*****ERROR: Use -split can't be used with -ext." << endl << "*****" << endl; + showHelp = true; + }*/ + if (add_gb_track_line && !(bedGraph||bedGraphAll)) { cerr << endl << "*****" << endl << "*****ERROR: Using -trackline requires bedGraph output (use -bg or -bga)." << endl << "*****" << endl; showHelp = true; @@ -225,7 +249,8 @@ int genomecoverage_main(int argc, char* argv[]) { only_5p_end, only_3p_end, pair_chip, haveSize, fragmentSize, dUTP, eachBaseZeroBased, - add_gb_track_line, gb_track_opts); + add_gb_track_line, gb_track_opts, + extensionSize, tn5); delete bc; } else { @@ -280,7 +305,7 @@ void genomecoverage_help(void) { cerr << "\t-fs\t\t" << "Force to use provided fragment size instead of read length" << endl; cerr << "\t\t\tWorks for BAM files only" << endl; - cerr << "\t-du\t\t" << "Change strand af the mate read (so both reads from the same strand) useful for strand specific" << endl; + cerr << "\t-du\t\t" << "Change strand of the mate read (so both reads from the same strand) useful for strand specific" << endl; cerr << "\t\t\tWorks for BAM files only" << endl; cerr << "\t-5\t\t" << "Calculate coverage of 5\" positions (instead of entire interval)." << endl << endl; @@ -303,7 +328,8 @@ void genomecoverage_help(void) { cerr <<"\t\t\t http://genome.ucsc.edu/goldenPath/help/bedgraph.html" << endl; cerr <<"\t\t\t- NOTE: When adding a trackline definition, the output BedGraph can be easily" << endl; cerr <<"\t\t\t uploaded to the Genome Browser as a custom track," << endl; - cerr <<"\t\t\t BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl; + //cerr <<"\t\t\t BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl; + // With v 4 there is no issue. cerr << "\t-trackopts\t"<<"Writes additional track line definition parameters in the first line." << endl; cerr <<"\t\t\t- Example:" << endl; @@ -311,6 +337,16 @@ void genomecoverage_help(void) { cerr <<"\t\t\t Note the use of single-quotes if you have spaces in your parameters." << endl; cerr <<"\t\t\t- (TEXT)" << endl << endl; + cerr << "\t-ext\t\t"<<"Extends the coverage in both directions of the desired number of bases." << endl; + cerr << "\t\t\tUseful when you have very sparse data like when you use -5 or -3." << endl; + cerr << "\t\t\t- Default is 0; i.e., no extension." << endl; + cerr << "\t\t\t- (INT)" << endl << endl; + + cerr << "\t-tn5\t\t"<<"Shifts the 5' to match the insertion site of the Tn5." << endl; + cerr << "\t\t\tIt will shift the 5' extremity of 5bp to the left for reverse strand" << endl; + cerr << "\t\t\tand 4bp to the right for forward strand." << endl; + cerr << "\t\t\tUseful when you are working with ATAC-seq data and you want to see the footprint." << endl; + cerr << "Notes: " << endl; cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; cerr << "\t " << endl << endl; diff --git a/test/genomecov/test-genomecov.sh b/test/genomecov/test-genomecov.sh index 17f25d3a5..a134b714d 100755 --- a/test/genomecov/test-genomecov.sh +++ b/test/genomecov/test-genomecov.sh @@ -260,4 +260,26 @@ $BT genomecov -ibam chip.bam -bg -fs 100 > obs check obs exp rm obs exp +################################################################## +# Test chip with tn5 +################################################################## +echo -e " genomecov.t16...\c" +echo \ +"chr1 5 76 1 +chr1 225 295 1" > exp +$BT genomecov -ibam chip.bam -bg -tn5 > obs +check obs exp +rm obs exp + +################################################################## +# Test chip with ext +################################################################## +echo -e " genomecov.t17...\c" +echo \ +"chr1 0 86 1 +chr1 215 310 1" > exp +$BT genomecov -ibam chip.bam -bg -ext 10 > obs +check obs exp +rm obs exp + [[ $FAILURES -eq 0 ]] || exit 1; From 4d79b7f9081da9aa5e18d847d42e13b28fd55f50 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Tue, 7 May 2019 14:16:34 +0200 Subject: [PATCH 2/3] corrected position of remove intermediate files --- test/genomecov/test-genomecov.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/genomecov/test-genomecov.sh b/test/genomecov/test-genomecov.sh index fd04cf2bf..741d44801 100755 --- a/test/genomecov/test-genomecov.sh +++ b/test/genomecov/test-genomecov.sh @@ -294,7 +294,7 @@ echo \ chr1 225 295 1" > exp $BT genomecov -ibam chip.bam -bg -tn5 > obs check obs exp -rm one_block.bam two_blocks.bam three_blocks.bam sam-w-del.bam pair-chip.bam chip.bam +rm obs exp ################################################################## # Test chip with ext @@ -307,4 +307,7 @@ $BT genomecov -ibam chip.bam -bg -ext 10 > obs check obs exp rm obs exp + +rm one_block.bam two_blocks.bam three_blocks.bam sam-w-del.bam pair-chip.bam chip.bam + [[ $FAILURES -eq 0 ]] || exit 1; From 6bf6f19366d9b804b1f8615ad93998ced54f4c51 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 21 Aug 2019 11:29:45 +0200 Subject: [PATCH 3/3] corrected pull --- .../BamTools/include/api/BamAlgorithms.h | 21 -- src/utils/BamTools/include/api/BamConstants.h | 282 ------------------ src/utils/BamTools/include/api/BamIndex.h | 90 ------ src/utils/BamTools/include/api/IBamIODevice.h | 98 ------ src/utils/BamTools/include/api/SamHeader.h | 74 ----- src/utils/BamTools/include/api/SamProgram.h | 61 ---- .../BamTools/include/api/SamProgramChain.h | 85 ------ src/utils/BamTools/include/api/SamReadGroup.h | 68 ----- .../include/api/SamReadGroupDictionary.h | 85 ------ src/utils/BamTools/include/api/SamSequence.h | 60 ---- .../include/api/SamSequenceDictionary.h | 86 ------ src/utils/BamTools/include/api/api_global.h | 21 -- .../BamTools/include/shared/bamtools_global.h | 97 ------ 13 files changed, 1128 deletions(-) delete mode 100644 src/utils/BamTools/include/api/BamAlgorithms.h delete mode 100644 src/utils/BamTools/include/api/BamConstants.h delete mode 100644 src/utils/BamTools/include/api/BamIndex.h delete mode 100644 src/utils/BamTools/include/api/IBamIODevice.h delete mode 100644 src/utils/BamTools/include/api/SamHeader.h delete mode 100644 src/utils/BamTools/include/api/SamProgram.h delete mode 100644 src/utils/BamTools/include/api/SamProgramChain.h delete mode 100644 src/utils/BamTools/include/api/SamReadGroup.h delete mode 100644 src/utils/BamTools/include/api/SamReadGroupDictionary.h delete mode 100644 src/utils/BamTools/include/api/SamSequence.h delete mode 100644 src/utils/BamTools/include/api/SamSequenceDictionary.h delete mode 100644 src/utils/BamTools/include/api/api_global.h delete mode 100644 src/utils/BamTools/include/shared/bamtools_global.h diff --git a/src/utils/BamTools/include/api/BamAlgorithms.h b/src/utils/BamTools/include/api/BamAlgorithms.h deleted file mode 100644 index 61094123a..000000000 --- a/src/utils/BamTools/include/api/BamAlgorithms.h +++ /dev/null @@ -1,21 +0,0 @@ -// *************************************************************************** -// BamAlgorithms.h (c) 2009 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides generic algorithms that are intended to work with BamTools data -// structures. Where possible, these are intended to be STL-compatible. -// *************************************************************************** - -#ifndef BAMALGORITHMS_H -#define BAMALGORITHMS_H - -#include "api/algorithms/Sort.h" - -/*! \namespace BamTools::Algorithms - \brief Provides convenient classes & methods for working with BAM data -*/ - -#endif // BAM_ALGORITHMS_H diff --git a/src/utils/BamTools/include/api/BamConstants.h b/src/utils/BamTools/include/api/BamConstants.h deleted file mode 100644 index 88ab04643..000000000 --- a/src/utils/BamTools/include/api/BamConstants.h +++ /dev/null @@ -1,282 +0,0 @@ -// *************************************************************************** -// BamConstants.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 16 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides basic constants for handling BAM files. -// *************************************************************************** - -#ifndef BAM_CONSTANTS_H -#define BAM_CONSTANTS_H - -#include "api/api_global.h" -#include -#include - -/*! \namespace BamTools::Constants - \brief Provides basic constants for handling BAM files. -*/ - -namespace BamTools { -namespace Constants { - -const uint8_t BAM_SIZEOF_INT = 4; - -// header magic number -const char* const BAM_HEADER_MAGIC = "BAM\1"; -const uint8_t BAM_HEADER_MAGIC_LENGTH = 4; - -// BAM alignment core size -const uint8_t BAM_CORE_SIZE = 32; -const uint8_t BAM_CORE_BUFFER_SIZE = 8; - -// BAM alignment flags -const int BAM_ALIGNMENT_PAIRED = 0x0001; -const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002; -const int BAM_ALIGNMENT_UNMAPPED = 0x0004; -const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008; -const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010; -const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020; -const int BAM_ALIGNMENT_READ_1 = 0x0040; -const int BAM_ALIGNMENT_READ_2 = 0x0080; -const int BAM_ALIGNMENT_SECONDARY = 0x0100; -const int BAM_ALIGNMENT_QC_FAILED = 0x0200; -const int BAM_ALIGNMENT_DUPLICATE = 0x0400; - -// CIGAR constants -const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X"; -const uint8_t BAM_CIGAR_MATCH = 0; -const uint8_t BAM_CIGAR_INS = 1; -const uint8_t BAM_CIGAR_DEL = 2; -const uint8_t BAM_CIGAR_REFSKIP = 3; -const uint8_t BAM_CIGAR_SOFTCLIP = 4; -const uint8_t BAM_CIGAR_HARDCLIP = 5; -const uint8_t BAM_CIGAR_PAD = 6; -const uint8_t BAM_CIGAR_SEQMATCH = 7; -const uint8_t BAM_CIGAR_MISMATCH = 8; - -const char BAM_CIGAR_MATCH_CHAR = 'M'; -const char BAM_CIGAR_INS_CHAR = 'I'; -const char BAM_CIGAR_DEL_CHAR = 'D'; -const char BAM_CIGAR_REFSKIP_CHAR = 'N'; -const char BAM_CIGAR_SOFTCLIP_CHAR = 'S'; -const char BAM_CIGAR_HARDCLIP_CHAR = 'H'; -const char BAM_CIGAR_PAD_CHAR = 'P'; -const char BAM_CIGAR_SEQMATCH_CHAR = '='; -const char BAM_CIGAR_MISMATCH_CHAR = 'X'; - -const int BAM_CIGAR_SHIFT = 4; -const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1); - -// BAM tag types & sizes -const char BAM_TAG_TYPE_ASCII = 'A'; -const char BAM_TAG_TYPE_INT8 = 'c'; -const char BAM_TAG_TYPE_UINT8 = 'C'; -const char BAM_TAG_TYPE_INT16 = 's'; -const char BAM_TAG_TYPE_UINT16 = 'S'; -const char BAM_TAG_TYPE_INT32 = 'i'; -const char BAM_TAG_TYPE_UINT32 = 'I'; -const char BAM_TAG_TYPE_FLOAT = 'f'; -const char BAM_TAG_TYPE_STRING = 'Z'; -const char BAM_TAG_TYPE_HEX = 'H'; -const char BAM_TAG_TYPE_ARRAY = 'B'; - -const uint8_t BAM_TAG_TAGSIZE = 2; -const uint8_t BAM_TAG_TYPESIZE = 1; -const uint8_t BAM_TAG_ARRAYBASE_SIZE = 8; - -// DNA bases -const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"; -const uint8_t BAM_BASECODE_EQUAL = 0; -const uint8_t BAM_BASECODE_A = 1; -const uint8_t BAM_BASECODE_C = 2; -const uint8_t BAM_BASECODE_M = 3; -const uint8_t BAM_BASECODE_G = 4; -const uint8_t BAM_BASECODE_R = 5; -const uint8_t BAM_BASECODE_S = 6; -const uint8_t BAM_BASECODE_V = 7; -const uint8_t BAM_BASECODE_T = 8; -const uint8_t BAM_BASECODE_W = 9; -const uint8_t BAM_BASECODE_Y = 10; -const uint8_t BAM_BASECODE_H = 11; -const uint8_t BAM_BASECODE_K = 12; -const uint8_t BAM_BASECODE_D = 13; -const uint8_t BAM_BASECODE_B = 14; -const uint8_t BAM_BASECODE_N = 15; - -const char BAM_DNA_EQUAL = '='; -const char BAM_DNA_A = 'A'; -const char BAM_DNA_C = 'C'; -const char BAM_DNA_M = 'M'; -const char BAM_DNA_G = 'G'; -const char BAM_DNA_R = 'R'; -const char BAM_DNA_S = 'S'; -const char BAM_DNA_V = 'V'; -const char BAM_DNA_T = 'T'; -const char BAM_DNA_W = 'W'; -const char BAM_DNA_Y = 'Y'; -const char BAM_DNA_H = 'H'; -const char BAM_DNA_K = 'K'; -const char BAM_DNA_D = 'D'; -const char BAM_DNA_B = 'B'; -const char BAM_DNA_N = 'N'; -const char BAM_DNA_DEL = '-'; -const char BAM_DNA_PAD = '*'; - -// zlib & BGZF constants -const char GZIP_ID1 = '\x1F'; -const char GZIP_ID2 = '\x8B'; -const char CM_DEFLATE = 8; -const char FLG_FEXTRA = 4; -const char OS_UNKNOWN = '\xFF'; -const char BGZF_XLEN = 6; -const char BGZF_ID1 = 66; -const char BGZF_ID2 = 67; -const char BGZF_LEN = 2; - -const int8_t GZIP_WINDOW_BITS = -15; -const int8_t Z_DEFAULT_MEM_LEVEL = 8; -const uint8_t BGZF_BLOCK_HEADER_LENGTH = 18; -const uint8_t BGZF_BLOCK_FOOTER_LENGTH = 8; -const uint32_t BGZF_MAX_BLOCK_SIZE = 65536; -const uint32_t BGZF_DEFAULT_BLOCK_SIZE = 65536; - -} // namespace Constants - -//! \cond -// ------------------------- -// tag-type helper structs -// ------------------------- - -// fail on any types not specified below -template -struct TagTypeHelper { - static bool CanConvertFrom(const char) { assert(false); return false; } - static bool CanConvertTo(const char) { assert(false); return false; } - static char TypeCode(void) { assert(false); return 0; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_UINT8 ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_UINT8 || - c == Constants::BAM_TAG_TYPE_UINT16 || - c == Constants::BAM_TAG_TYPE_UINT32 ); - } - - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT8; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_INT8 ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_INT8 || - c == Constants::BAM_TAG_TYPE_INT16 || - c == Constants::BAM_TAG_TYPE_INT32 ); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT8; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_UINT8 || - c == Constants::BAM_TAG_TYPE_UINT16 ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_UINT16 || - c == Constants::BAM_TAG_TYPE_UINT32); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT16; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_INT8 || - c == Constants::BAM_TAG_TYPE_INT16 ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_INT16 || - c == Constants::BAM_TAG_TYPE_INT32); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT16; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_UINT8 || - c == Constants::BAM_TAG_TYPE_UINT16 || - c == Constants::BAM_TAG_TYPE_UINT32 ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_UINT32 ); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_UINT32; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_INT8 || - c == Constants::BAM_TAG_TYPE_INT16 || - c == Constants::BAM_TAG_TYPE_INT32 ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_INT32 ); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_INT32; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_ASCII || - c == Constants::BAM_TAG_TYPE_UINT8 || - c == Constants::BAM_TAG_TYPE_INT8 || - c == Constants::BAM_TAG_TYPE_UINT16 || - c == Constants::BAM_TAG_TYPE_INT16 || - c == Constants::BAM_TAG_TYPE_UINT32 || - c == Constants::BAM_TAG_TYPE_INT32 || - c == Constants::BAM_TAG_TYPE_FLOAT); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_FLOAT ); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_FLOAT; } -}; - -template<> -struct TagTypeHelper { - static bool CanConvertFrom(const char c) { - return ( c == Constants::BAM_TAG_TYPE_HEX || - c == Constants::BAM_TAG_TYPE_STRING ); - } - static bool CanConvertTo(const char c) { - return ( c == Constants::BAM_TAG_TYPE_HEX || - c == Constants::BAM_TAG_TYPE_STRING ); - } - static char TypeCode(void) { return Constants::BAM_TAG_TYPE_STRING; } -}; - -//! \endcond - -} // namespace BamTools - -#endif // BAM_CONSTANTS_H diff --git a/src/utils/BamTools/include/api/BamIndex.h b/src/utils/BamTools/include/api/BamIndex.h deleted file mode 100644 index fd41f6912..000000000 --- a/src/utils/BamTools/include/api/BamIndex.h +++ /dev/null @@ -1,90 +0,0 @@ -// *************************************************************************** -// BamIndex.h (c) 2009 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides basic BAM index interface -// *************************************************************************** - -#ifndef BAM_INDEX_H -#define BAM_INDEX_H - -#include "api/api_global.h" -#include "api/BamAux.h" -#include - -namespace BamTools { - -namespace Internal { - class BamReaderPrivate; -} // namespace Internal - -/*! \class BamTools::BamIndex - \brief Provides methods for generating & loading BAM index files. - - This class straddles the line between public API and internal - implementation detail. Most client code should never have to use this - class directly. - - It is exposed to the public API to allow advanced users to implement - their own custom indexing schemes. -*/ - -class API_EXPORT BamIndex { - - // enums - public: - - // list of supported BamIndex types - enum IndexType { BAMTOOLS = 0 - , STANDARD - }; - - // ctor & dtor - public: - BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { } - virtual ~BamIndex(void) { } - - // index interface - public: - // builds index from associated BAM file & writes out to index file - virtual bool Create(void) =0; - - // returns a human-readable description of the last error encountered - std::string GetErrorString(void) { return m_errorString; } - - // returns whether reference has alignments or no - virtual bool HasAlignments(const int& referenceID) const =0; - - // attempts to use index data to jump to @region, returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; - - // loads existing data from file into memory - virtual bool Load(const std::string& filename) =0; - - // returns the 'type' enum for derived index format - virtual BamIndex::IndexType Type(void) const =0; - - //! \cond - - // internal methods - protected: - void SetErrorString(const std::string& where, const std::string& what) const { - m_errorString = where + ": " + what; - } - - // data members - protected: - Internal::BamReaderPrivate* m_reader; // copy, not owned - mutable std::string m_errorString; - - //! \endcond -}; - -} // namespace BamTools - -#endif // BAM_INDEX_H diff --git a/src/utils/BamTools/include/api/IBamIODevice.h b/src/utils/BamTools/include/api/IBamIODevice.h deleted file mode 100644 index cf641298a..000000000 --- a/src/utils/BamTools/include/api/IBamIODevice.h +++ /dev/null @@ -1,98 +0,0 @@ -// *************************************************************************** -// IBamIODevice.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 November 2011 (DB) -// --------------------------------------------------------------------------- -// Base class for all BAM I/O devices (e.g. local file, pipe, HTTP, FTP, etc.) -// -// Derived classes should provide protocol-specific implementations for -// reading/writing plain bytes, as well as other I/O-related behaviors. -// -// Since IBamIODevices may be defined in client code, the internal -// BamExceptions are NOT allowed to be thrown from devices, including the -// built-in ones. This keeps a consistent interface at the BgzfStream for -// handling any device type. Use the error string for relaying error messages. -// *************************************************************************** - -#ifndef IBAMIODEVICE_H -#define IBAMIODEVICE_H - -#include "api/api_global.h" -#include -#include - -namespace BamTools { - -class API_EXPORT IBamIODevice { - - // enums - public: enum OpenMode { NotOpen = 0x0000 - , ReadOnly = 0x0001 - , WriteOnly = 0x0002 - , ReadWrite = ReadOnly | WriteOnly - }; - - // ctor & dtor - public: - virtual ~IBamIODevice(void) { } - - // IBamIODevice interface - public: - - // TODO: add seek(pos, *from*) - - // pure virtuals - virtual void Close(void) =0; - virtual bool IsRandomAccess(void) const =0; - virtual bool Open(const OpenMode mode) =0; - virtual int64_t Read(char* data, const unsigned int numBytes) =0; - virtual bool Seek(const int64_t& position, const int origin = SEEK_SET) =0; - virtual int64_t Tell(void) const =0; - virtual int64_t Write(const char* data, const unsigned int numBytes) =0; - - // default implementation provided - virtual std::string GetErrorString(void); - virtual bool IsOpen(void) const; - virtual OpenMode Mode(void) const; - - // internal methods - protected: - IBamIODevice(void); // hidden ctor - void SetErrorString(const std::string& where, const std::string& what); - - // data members - protected: - OpenMode m_mode; - std::string m_errorString; -}; - -inline -IBamIODevice::IBamIODevice(void) - : m_mode(IBamIODevice::NotOpen) -{ } - -inline -std::string IBamIODevice::GetErrorString(void) { - return m_errorString; -} - -inline -bool IBamIODevice::IsOpen(void) const { - return ( m_mode != IBamIODevice::NotOpen ); -} - -inline -IBamIODevice::OpenMode IBamIODevice::Mode(void) const { - return m_mode; -} - -inline -void IBamIODevice::SetErrorString(const std::string& where, const std::string& what) { - static const std::string SEPARATOR = ": "; - m_errorString = where + SEPARATOR + what; -} - -} // namespace BamTools - -#endif // IBAMIODEVICE_H diff --git a/src/utils/BamTools/include/api/SamHeader.h b/src/utils/BamTools/include/api/SamHeader.h deleted file mode 100644 index 869a3478b..000000000 --- a/src/utils/BamTools/include/api/SamHeader.h +++ /dev/null @@ -1,74 +0,0 @@ -// *************************************************************************** -// SamHeader.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM header data fields. -// *************************************************************************** - -#ifndef SAM_HEADER_H -#define SAM_HEADER_H - -#include "api/api_global.h" -#include "api/SamProgramChain.h" -#include "api/SamReadGroupDictionary.h" -#include "api/SamSequenceDictionary.h" -#include -#include - -namespace BamTools { - -class API_EXPORT SamHeader { -public: - // ctor & dtor - SamHeader(const std::string& headerText = ""); - SamHeader(const SamHeader& other); - ~SamHeader(void); - - // query/modify entire SamHeader - void Clear(void); // clears all header contents - std::string GetErrorString(void) const; - bool HasError(void) const; - bool IsValid(bool verbose = false) const; // returns true if SAM header is well-formed - void SetHeaderText(const std::string& headerText); // replaces data fields with contents of SAM-formatted text - std::string ToString(void) const; // returns the printable, SAM-formatted header text - - // convenience query methods - bool HasVersion(void) const; // returns true if header contains format version entry - bool HasSortOrder(void) const; // returns true if header contains sort order entry - bool HasGroupOrder(void) const; // returns true if header contains group order entry - bool HasSequences(void) const; // returns true if header contains any sequence entries - bool HasReadGroups(void) const; // returns true if header contains any read group entries - bool HasPrograms(void) const; // returns true if header contains any program record entries - bool HasComments(void) const; // returns true if header contains comments - - // -------------- - // data members - // -------------- - - // header metadata (@HD line) - std::string Version; // VN: *Required, if @HD record is present* - std::string SortOrder; // SO: - std::string GroupOrder; // GO: - - // header sequences (@SQ entries) - SamSequenceDictionary Sequences; - - // header read groups (@RG entries) - SamReadGroupDictionary ReadGroups; - - // header program data (@PG entries) - SamProgramChain Programs; - - // header comments (@CO entries) - std::vector Comments; - - // internal data - private: - mutable std::string m_errorString; -}; - -} // namespace BamTools - -#endif // SAM_HEADER_H diff --git a/src/utils/BamTools/include/api/SamProgram.h b/src/utils/BamTools/include/api/SamProgram.h deleted file mode 100644 index 54da8723c..000000000 --- a/src/utils/BamTools/include/api/SamProgram.h +++ /dev/null @@ -1,61 +0,0 @@ -// *************************************************************************** -// SamProgram.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM header program records. -// *************************************************************************** - -#ifndef SAM_PROGRAM_H -#define SAM_PROGRAM_H - -#include "api/api_global.h" -#include - -namespace BamTools { - -class SamProgramChain; - -struct API_EXPORT SamProgram { - - // ctor & dtor - SamProgram(void); - SamProgram(const std::string& id); - SamProgram(const SamProgram& other); - ~SamProgram(void); - - // query/modify entire program record - void Clear(void); // clears all data fields - - // convenience query methods - bool HasCommandLine(void) const; // returns true if program record has a command line entry - bool HasID(void) const; // returns true if program record has an ID - bool HasName(void) const; // returns true if program record has a name - bool HasPreviousProgramID(void) const; // returns true if program record has a 'previous program ID' - bool HasVersion(void) const; // returns true if program record has a version - - // data members - std::string CommandLine; // CL: - std::string ID; // ID: *Required for valid SAM header* - std::string Name; // PN: - std::string PreviousProgramID; // PP: - std::string Version; // VN: - - // internal (non-standard) methods & fields - private: - bool HasNextProgramID(void) const; - std::string NextProgramID; - friend class BamTools::SamProgramChain; -}; - -/*! \fn bool operator==(const SamProgram& lhs, const SamProgram& rhs) - \brief tests equality by comparing program IDs -*/ -API_EXPORT inline bool operator==(const SamProgram& lhs, const SamProgram& rhs) { - return lhs.ID == rhs.ID; -} - -} // namespace BamTools - -#endif // SAM_PROGRAM_H diff --git a/src/utils/BamTools/include/api/SamProgramChain.h b/src/utils/BamTools/include/api/SamProgramChain.h deleted file mode 100644 index a2bd5322a..000000000 --- a/src/utils/BamTools/include/api/SamProgramChain.h +++ /dev/null @@ -1,85 +0,0 @@ -// *************************************************************************** -// SamProgramChain.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides methods for operating on a SamProgram record "chain" -// *************************************************************************** - -#ifndef SAM_PROGRAMCHAIN_H -#define SAM_PROGRAMCHAIN_H - -#include "api/api_global.h" -#include "api/SamProgram.h" -#include -#include - -namespace BamTools { - -// chain is *NOT* sorted in any order -// use First()/Last() to retrieve oldest/newest programs, respectively -typedef std::vector SamProgramContainer; -typedef SamProgramContainer::iterator SamProgramIterator; -typedef SamProgramContainer::const_iterator SamProgramConstIterator; - -class API_EXPORT SamProgramChain { - - // ctor & dtor - public: - SamProgramChain(void); - SamProgramChain(const SamProgramChain& other); - ~SamProgramChain(void); - - // query/modify program data - public: - // appends a program record to the chain - void Add(SamProgram& program); - void Add(std::vector& programs); - - // clears all read group entries - void Clear(void); - - // returns true if chain contains this program record (matches on ID) - bool Contains(const SamProgram& program) const; - bool Contains(const std::string& programId) const; - - // returns the first (oldest) program in the chain - SamProgram& First(void); - const SamProgram& First(void) const; - - // returns true if chain is empty - bool IsEmpty(void) const; - - // returns last (most recent) program in the chain - SamProgram& Last(void); - const SamProgram& Last(void) const; - - // returns number of program records in the chain - int Size(void) const; - - // retrieves a modifiable reference to the SamProgram object associated with this ID - SamProgram& operator[](const std::string& programId); - - // retrieve STL-compatible iterators - public: - SamProgramIterator Begin(void); // returns iterator to begin() - SamProgramConstIterator Begin(void) const; // returns const_iterator to begin() - SamProgramConstIterator ConstBegin(void) const; // returns const_iterator to begin() - SamProgramIterator End(void); // returns iterator to end() - SamProgramConstIterator End(void) const; // returns const_iterator to end() - SamProgramConstIterator ConstEnd(void) const; // returns const_iterator to end() - - // internal methods - private: - int IndexOf(const std::string& programId) const; - const std::string NextIdFor(const std::string& programId) const; - - // data members - private: - SamProgramContainer m_data; -}; - -} // namespace BamTools - -#endif // SAM_PROGRAMCHAIN_H diff --git a/src/utils/BamTools/include/api/SamReadGroup.h b/src/utils/BamTools/include/api/SamReadGroup.h deleted file mode 100644 index 093ce2d76..000000000 --- a/src/utils/BamTools/include/api/SamReadGroup.h +++ /dev/null @@ -1,68 +0,0 @@ -// *************************************************************************** -// SamReadGroup.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM read group data fields. -// *************************************************************************** - -#ifndef SAM_READGROUP_H -#define SAM_READGROUP_H - -#include "api/api_global.h" -#include - -namespace BamTools { - -struct API_EXPORT SamReadGroup { - - // ctor & dtor - SamReadGroup(void); - SamReadGroup(const std::string& id); - SamReadGroup(const SamReadGroup& other); - ~SamReadGroup(void); - - // query/modify entire read group - void Clear(void); // clears all data fields - - // convenience query methods - bool HasDescription(void) const; // returns true if read group has a description - bool HasFlowOrder(void) const; // returns true if read group has a flow order entry - bool HasID(void) const; // returns true if read group has a group ID - bool HasKeySequence(void) const; // returns true if read group has a key sequence - bool HasLibrary(void) const; // returns true if read group has a library name - bool HasPlatformUnit(void) const; // returns true if read group has a platform unit ID - bool HasPredictedInsertSize(void) const; // returns true if read group has a predicted insert size - bool HasProductionDate(void) const; // returns true if read group has a production date - bool HasProgram(void) const; // returns true if read group has a program entry - bool HasSample(void) const; // returns true if read group has a sample name - bool HasSequencingCenter(void) const; // returns true if read group has a sequencing center ID - bool HasSequencingTechnology(void) const; // returns true if read group has a sequencing technology ID - - - // data fields - std::string Description; // DS: - std::string FlowOrder; // FO: - std::string ID; // ID: *Required for valid SAM header* - std::string KeySequence; // KS: - std::string Library; // LB: - std::string PlatformUnit; // PU: - std::string PredictedInsertSize; // PI: - std::string ProductionDate; // DT: - std::string Program; // PG: - std::string Sample; // SM: - std::string SequencingCenter; // CN: - std::string SequencingTechnology; // PL: -}; - -/*! \fn bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) - \brief tests equality by comparing read group IDs -*/ -API_EXPORT inline bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) { - return lhs.ID == rhs.ID; -} - -} // namespace BamTools - -#endif // SAM_READGROUP_H diff --git a/src/utils/BamTools/include/api/SamReadGroupDictionary.h b/src/utils/BamTools/include/api/SamReadGroupDictionary.h deleted file mode 100644 index a4aeda951..000000000 --- a/src/utils/BamTools/include/api/SamReadGroupDictionary.h +++ /dev/null @@ -1,85 +0,0 @@ -// *************************************************************************** -// SamReadGroupDictionary.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 16 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides methods for operating on a collection of SamReadGroup entries. -// *************************************************************************** - -#ifndef SAM_READGROUP_DICTIONARY_H -#define SAM_READGROUP_DICTIONARY_H - -#include "api/api_global.h" -#include "api/SamReadGroup.h" -#include -#include -#include - -namespace BamTools { - -typedef std::vector SamReadGroupContainer; -typedef SamReadGroupContainer::iterator SamReadGroupIterator; -typedef SamReadGroupContainer::const_iterator SamReadGroupConstIterator; - -class API_EXPORT SamReadGroupDictionary { - - // ctor & dtor - public: - SamReadGroupDictionary(void); - SamReadGroupDictionary(const SamReadGroupDictionary& other); - ~SamReadGroupDictionary(void); - - // query/modify read group data - public: - // adds a read group - void Add(const SamReadGroup& readGroup); - void Add(const std::string& readGroupId); - - // adds multiple read groups - void Add(const SamReadGroupDictionary& readGroups); - void Add(const std::vector& readGroups); - void Add(const std::vector& readGroupIds); - - // clears all read group entries - void Clear(void); - - // returns true if dictionary contains this read group - bool Contains(const SamReadGroup& readGroup) const; - bool Contains(const std::string& readGroupId) const; - - // returns true if dictionary is empty - bool IsEmpty(void) const; - - // removes read group, if found - void Remove(const SamReadGroup& readGroup); - void Remove(const std::string& readGroupId); - - // removes multiple read groups - void Remove(const std::vector& readGroups); - void Remove(const std::vector& readGroupIds); - - // returns number of read groups in dictionary - int Size(void) const; - - // retrieves a modifiable reference to the SamReadGroup object associated with this ID - SamReadGroup& operator[](const std::string& readGroupId); - - // retrieve STL-compatible iterators - public: - SamReadGroupIterator Begin(void); // returns iterator to begin() - SamReadGroupConstIterator Begin(void) const; // returns const_iterator to begin() - SamReadGroupConstIterator ConstBegin(void) const; // returns const_iterator to begin() - SamReadGroupIterator End(void); // returns iterator to end() - SamReadGroupConstIterator End(void) const; // returns const_iterator to end() - SamReadGroupConstIterator ConstEnd(void) const; // returns const_iterator to end() - - // data members - private: - SamReadGroupContainer m_data; - std::map m_lookupData; -}; - -} // namespace BamTools - -#endif // SAM_READGROUP_DICTIONARY_H diff --git a/src/utils/BamTools/include/api/SamSequence.h b/src/utils/BamTools/include/api/SamSequence.h deleted file mode 100644 index c1a879206..000000000 --- a/src/utils/BamTools/include/api/SamSequence.h +++ /dev/null @@ -1,60 +0,0 @@ -// *************************************************************************** -// SamSequence.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM sequence data fields. -// *************************************************************************** - -#ifndef SAM_SEQUENCE_H -#define SAM_SEQUENCE_H - -#include "api/api_global.h" -#include - -namespace BamTools { - -struct API_EXPORT SamSequence { - - // ctor & dtor - SamSequence(void); - SamSequence(const std::string& name, const int& length); - SamSequence(const std::string& name, const std::string& length); - SamSequence(const SamSequence& other); - ~SamSequence(void); - - // query/modify entire sequence - void Clear(void); // clears all contents - - // convenience query methods - bool HasAssemblyID(void) const; // returns true if sequence has an assembly ID - bool HasChecksum(void) const; // returns true if sequence has an MD5 checksum - bool HasLength(void) const; // returns true if sequence has a length - bool HasName(void) const; // returns true if sequence has a name - bool HasSpecies(void) const; // returns true if sequence has a species ID - bool HasURI(void) const; // returns true if sequence has a URI - - // data members - std::string AssemblyID; // AS: - std::string Checksum; // M5: - std::string Length; // LN: *Required for valid SAM header* - std::string Name; // SN: *Required for valid SAM header* - std::string Species; // SP: - std::string URI; // UR: -}; - -/*! \fn bool operator==(const SamSequence& lhs, const SamSequence& rhs) - \brief tests equality by comparing sequence names, lengths, & checksums (if available) -*/ -API_EXPORT inline bool operator==(const SamSequence& lhs, const SamSequence& rhs) { - if ( lhs.Name != rhs.Name ) return false; - if ( lhs.Length != rhs.Length ) return false; - if ( lhs.HasChecksum() && rhs.HasChecksum() ) - return (lhs.Checksum == rhs.Checksum); - else return true; -} - -} // namespace BamTools - -#endif // SAM_SEQUENCE_H diff --git a/src/utils/BamTools/include/api/SamSequenceDictionary.h b/src/utils/BamTools/include/api/SamSequenceDictionary.h deleted file mode 100644 index d267dbdac..000000000 --- a/src/utils/BamTools/include/api/SamSequenceDictionary.h +++ /dev/null @@ -1,86 +0,0 @@ -// *************************************************************************** -// SamSequenceDictionary.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 16 October 2011 -// --------------------------------------------------------------------------- -// Provides methods for operating on a collection of SamSequence entries. -// *************************************************************************** - -#ifndef SAM_SEQUENCE_DICTIONARY_H -#define SAM_SEQUENCE_DICTIONARY_H - -#include "api/api_global.h" -#include "api/SamSequence.h" -#include -#include -#include - -namespace BamTools { - -typedef std::vector SamSequenceContainer; -typedef SamSequenceContainer::iterator SamSequenceIterator; -typedef SamSequenceContainer::const_iterator SamSequenceConstIterator; - -class API_EXPORT SamSequenceDictionary { - - // ctor & dtor - public: - SamSequenceDictionary(void); - SamSequenceDictionary(const SamSequenceDictionary& other); - ~SamSequenceDictionary(void); - - // query/modify sequence data - public: - // adds a sequence - void Add(const SamSequence& sequence); - void Add(const std::string& name, const int& length); - - // adds multiple sequences - void Add(const SamSequenceDictionary& sequences); - void Add(const std::vector& sequences); - void Add(const std::map& sequenceMap); - - // clears all sequence entries - void Clear(void); - - // returns true if dictionary contains this sequence - bool Contains(const SamSequence& sequence) const; - bool Contains(const std::string& sequenceName) const; - - // returns true if dictionary is empty - bool IsEmpty(void) const; - - // removes sequence, if found - void Remove(const SamSequence& sequence); - void Remove(const std::string& sequenceName); - - // removes multiple sequences - void Remove(const std::vector& sequences); - void Remove(const std::vector& sequenceNames); - - // returns number of sequences in dictionary - int Size(void) const; - - // retrieves a modifiable reference to the SamSequence object associated with this name - SamSequence& operator[](const std::string& sequenceName); - - // retrieve STL-compatible iterators - public: - SamSequenceIterator Begin(void); // returns iterator to begin() - SamSequenceConstIterator Begin(void) const; // returns const_iterator to begin() - SamSequenceConstIterator ConstBegin(void) const; // returns const_iterator to begin() - SamSequenceIterator End(void); // returns iterator to end() - SamSequenceConstIterator End(void) const; // returns const_iterator to end() - SamSequenceConstIterator ConstEnd(void) const; // returns const_iterator to end() - - // data members - private: - SamSequenceContainer m_data; - std::map m_lookupData; -}; - -} // namespace BamTools - -#endif // SAM_SEQUENCE_DICTIONARY_H - diff --git a/src/utils/BamTools/include/api/api_global.h b/src/utils/BamTools/include/api/api_global.h deleted file mode 100644 index f1c235348..000000000 --- a/src/utils/BamTools/include/api/api_global.h +++ /dev/null @@ -1,21 +0,0 @@ -// *************************************************************************** -// api_global.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 19 November 2010 (DB) -// --------------------------------------------------------------------------- -// Provides macros for exporting & importing BamTools API library symbols -// *************************************************************************** - -#ifndef API_GLOBAL_H -#define API_GLOBAL_H - -#include "shared/bamtools_global.h" - -#ifdef BAMTOOLS_API_LIBRARY -# define API_EXPORT BAMTOOLS_LIBRARY_EXPORT -#else -# define API_EXPORT BAMTOOLS_LIBRARY_IMPORT -#endif - -#endif // API_GLOBAL_H diff --git a/src/utils/BamTools/include/shared/bamtools_global.h b/src/utils/BamTools/include/shared/bamtools_global.h deleted file mode 100644 index e37bff6e2..000000000 --- a/src/utils/BamTools/include/shared/bamtools_global.h +++ /dev/null @@ -1,97 +0,0 @@ -// *************************************************************************** -// bamtools_global.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic definitions for exporting & importing library symbols. -// Also provides some platform-specific rules for definitions. -// *************************************************************************** - -#ifndef BAMTOOLS_GLOBAL_H -#define BAMTOOLS_GLOBAL_H - -/*! \brief Library export macro - \internal -*/ -#ifndef BAMTOOLS_LIBRARY_EXPORT -# if defined(WIN32) -# define BAMTOOLS_LIBRARY_EXPORT __declspec(dllexport) -# else -# define BAMTOOLS_LIBRARY_EXPORT __attribute__((visibility("default"))) -# endif -#endif // BAMTOOLS_LIBRARY_EXPORT - -/*! \brief Library import macro - \internal -*/ -#ifndef BAMTOOLS_LIBRARY_IMPORT -# if defined(WIN32) -# define BAMTOOLS_LIBRARY_IMPORT __declspec(dllimport) -# else -# define BAMTOOLS_LIBRARY_IMPORT -# endif -#endif // BAMTOOLS_LIBRARY_IMPORT - -/*! \brief Platform-specific type definitions - \internal -*/ -#ifndef BAMTOOLS_LFS -#define BAMTOOLS_LFS -# ifdef WIN32 -# define ftell64(a) _ftelli64(a) -# define fseek64(a,b,c) _fseeki64(a,b,c) -# else -# define ftell64(a) ftello(a) -# define fseek64(a,b,c) fseeko(a,b,c) -# endif -#endif // BAMTOOLS_LFS - -/*! \def ftell64(a) - \brief Platform-independent tell() operation. - \internal -*/ -/*! \def fseek64(a,b,c) - \brief Platform-independent seek() operation. - \internal -*/ - -/*! \brief Platform-specific type definitions - \internal -*/ -#ifndef BAMTOOLS_TYPES -#define BAMTOOLS_TYPES -# ifdef _MSC_VER - typedef char int8_t; - typedef unsigned char uint8_t; - typedef short int16_t; - typedef unsigned short uint16_t; - typedef int int32_t; - typedef unsigned int uint32_t; - typedef long long int64_t; - typedef unsigned long long uint64_t; -# else -# include -# endif -#endif // BAMTOOLS_TYPES - -//! \internal -inline void bamtools_noop(void) { } - -/*! \brief Assert definitions - \internal -*/ -#ifndef BAMTOOLS_ASSERTS -#define BAMTOOLS_ASSERTS -# ifdef NDEBUG -# define BT_ASSERT_UNREACHABLE bamtools_noop() -# define BT_ASSERT_X( condition, message ) bamtools_noop() -# else -# include -# include -# define BT_ASSERT_UNREACHABLE assert( false ) -# define BT_ASSERT_X( condition, message ) if (!( condition )) { throw std::runtime_error( message ); } -# endif -#endif // BAMTOOLS_ASSERTS - -#endif // BAMTOOLS_GLOBAL_H