Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added 2 options -tn5 and -ext #685

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ src/utils/version/version_git.h
.cproject
nbproject
sandbox
.directory
.vscode/*
test/general/o
test/genomecov/chip.bam
test/genomecov/one_block.bam
Expand Down
137 changes: 121 additions & 16 deletions src/genomeCoverageBed/genomeCoverageBed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
bool only_5p_end, bool only_3p_end,
bool pair_chip, bool haveSize, int fragmentSize, bool dUTP,
bool eachBaseZeroBased,
bool add_gb_track_line, string gb_track_line_opts) {
bool add_gb_track_line, string gb_track_line_opts,
int extensionSize, bool tn5) {

_bedFile = bedFile;
_genomeFile = genomeFile;
Expand All @@ -45,6 +46,8 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
_dUTP = dUTP;
_add_gb_track_line = add_gb_track_line;
_gb_track_line_opts = gb_track_line_opts;
_extensionSize = extensionSize;
_tn5 = tn5;
_currChromName = "";
_currChromSize = 0 ;

Expand Down Expand Up @@ -141,14 +144,47 @@ void BedGenomeCoverage::AddCoverage(CHRPOS start, CHRPOS end) {
}


void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks) {
void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks,string strand) {
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
bool isEmpty=(bedItr==bedEnd);
bool isFirst=true;
int pos_start=0;
int pos_end=0;
for (; bedItr != bedEnd; ++bedItr) {
// I need to Add the Coverage of the previous step as the final step has
// additional modifications
if(!isFirst){
if (pos_start<0) {
AddCoverage(0,pos_end);
}
else
AddCoverage(pos_start,pos_end);
}
// the end - 1 must be done because BamAncillary::getBamBlocks
// returns ends uncorrected for the genomeCoverageBed data structure.
// ugly, but necessary.
AddCoverage(bedItr->start, bedItr->end - 1);
pos_start=bedItr->start;
pos_end=bedItr->end - 1;
if (isFirst) {
if (_tn5 && (strand=="+")){
pos_start = pos_start+4;
}
pos_start = pos_start - _extensionSize;
isFirst=false;
}
}
if (!isEmpty){
// I modify the last block
if (_tn5 && (strand=="-")){
pos_end = pos_end-5;
}
pos_end = pos_end + _extensionSize;
if (pos_start<0) {
AddCoverage(0,pos_end);
}
else
AddCoverage(pos_start,pos_end);
}
}

Expand Down Expand Up @@ -189,18 +225,41 @@ void BedGenomeCoverage::CoverageBed() {
if (_obeySplits == true) {
bedVector bedBlocks; // vec to store the discrete BED "blocks"
GetBedBlocks(a, bedBlocks);
AddBlockedCoverage(bedBlocks);
AddBlockedCoverage(bedBlocks,a.strand);
}
else if (_only_5p_end) {
CHRPOS pos = ( a.strand=="+" ) ? a.start : a.end-1;
AddCoverage(pos,pos);
if (_tn5) {
pos = ( a.strand=="+" ) ? pos+4 : pos-5;
}
if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :(
AddCoverage(0, pos+_extensionSize);
}
else {
AddCoverage(pos-_extensionSize, pos+_extensionSize );
}
}
else if (_only_3p_end) {
CHRPOS pos = ( a.strand=="-" ) ? a.start : a.end-1;
AddCoverage(pos,pos);
if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :(
AddCoverage(0, pos+_extensionSize);
}
else {
AddCoverage(pos-_extensionSize, pos+_extensionSize );
}
}
else {
AddCoverage(a.start, a.end-1);
CHRPOS pos_start=a.start;
CHRPOS pos_end=a.end-1;
if (_tn5) {
pos_start = ( a.strand=="+" ) ? pos_start+4 : pos_start;
pos_end = ( a.strand=="-" ) ? pos_end-5 : pos_end;
}
if ( pos_start<_extensionSize ) {
AddCoverage(0,pos_end+_extensionSize);
}
else
AddCoverage(pos_start-_extensionSize,pos_end+_extensionSize);
}
}
}
Expand Down Expand Up @@ -326,20 +385,52 @@ void BedGenomeCoverage::CoverageBam(string bamFile) {
} else */

if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left
AddCoverage(bam.MatePosition, end);
int pos_start=bam.MatePosition;
int pos_end=end;
if (_tn5) {
pos_start = pos_start+4;
pos_end = pos_end-5;
}
if ( pos_start<_extensionSize ) {
AddCoverage(0,pos_end+_extensionSize);
}
else
AddCoverage(pos_start-_extensionSize,pos_end+_extensionSize);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right
AddCoverage(start, start + abs(bam.InsertSize) - 1);
int pos_start=start;
int pos_end=start + abs(bam.InsertSize) - 1;
if (_tn5) {
pos_start = pos_start+4;
pos_end = pos_end-5;
}
if ( pos_start<_extensionSize ) {
AddCoverage(0,pos_end+_extensionSize);
}
else
AddCoverage(pos_start-_extensionSize,pos_end+_extensionSize);
}
} else if (_haveSize) {
if(bam.IsReverseStrand()) {
if(end<_fragmentSize) { //sometimes fragmentSize is bigger :(
AddCoverage(0, end);
int pos=end;
if (_tn5){
pos=pos-5;
}
if(pos<(_fragmentSize+_extensionSize)) { //sometimes fragmentSize is bigger :(
AddCoverage(0, pos);
} else {
AddCoverage(end + 1 - _fragmentSize, end );
AddCoverage(pos + 1 - _fragmentSize - _extensionSize, pos + _extensionSize);
}
} else {
AddCoverage(start,start+_fragmentSize - 1);
int pos=start;
if (_tn5){
pos=pos+4;
}
if(pos<_extensionSize){
AddCoverage(0,pos+_fragmentSize - 1+_extensionSize);
}
else
AddCoverage(pos-_extensionSize,pos+_fragmentSize - 1+_extensionSize);
}
} else
// add coverage accordingly.
Expand All @@ -353,15 +444,29 @@ void BedGenomeCoverage::CoverageBam(string bamFile) {
else { // "D" true, "N" false
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
}
AddBlockedCoverage(bedBlocks);
string readStrand = ( !bam.IsReverseStrand() ) ? "+" : "-";
AddBlockedCoverage(bedBlocks, readStrand);
}
else if (_only_5p_end) {
CHRPOS pos = ( !bam.IsReverseStrand() ) ? start : end;
AddCoverage(pos,pos);
if (_tn5) {
pos = ( !bam.IsReverseStrand() ) ? pos+4 : pos-5;
}
if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :(
AddCoverage(0, pos+_extensionSize);
}
else {
AddCoverage(pos-_extensionSize, pos+_extensionSize );
}
}
else if (_only_3p_end) {
CHRPOS pos = ( bam.IsReverseStrand() ) ? start : end;
AddCoverage(pos,pos);
if ( pos<_extensionSize ) { //sometimes extensionSize is bigger :(
AddCoverage(0, pos+_extensionSize);
}
else {
AddCoverage(pos-_extensionSize, pos+_extensionSize );
}
}
}
// close the BAM
Expand Down
7 changes: 5 additions & 2 deletions src/genomeCoverageBed/genomeCoverageBed.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class BedGenomeCoverage {
bool only_5p_end, bool only_3p_end,
bool pair_chip,bool haveSize, int fragmentSize, bool dUTP,
bool eachBaseZeroBased,
bool add_gb_track_line, string gb_track_line_opts);
bool add_gb_track_line, string gb_track_line_opts,
int extensionSize, bool tn5);

// destructor
~BedGenomeCoverage(void);
Expand Down Expand Up @@ -79,6 +80,8 @@ class BedGenomeCoverage {
bool _add_gb_track_line;
string _gb_track_line_opts;
string _requestedStrand;
int _extensionSize;
bool _tn5;

BedFile *_bed;
NewGenomeFile *_genome;
Expand All @@ -102,7 +105,7 @@ class BedGenomeCoverage {
void ResetChromCoverage();
void StartNewChrom (const string& chrom);
void AddCoverage (CHRPOS start, CHRPOS end);
void AddBlockedCoverage(const vector<BED> &bedBlocks);
void AddBlockedCoverage(const vector<BED> &bedBlocks, string strand);
void PrintFinalCoverage();
void PrintEmptyChromosomes();
void PrintTrackDefinitionLine();
Expand Down
42 changes: 39 additions & 3 deletions src/genomeCoverageBed/genomeCoverageMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ int genomecoverage_main(int argc, char* argv[]) {
string bedFile;
string genomeFile;
int max = INT_MAX;
int extensionSize = 0;
float scale = 1.0;
float fragmentSize = 146; //Nucleosome :)

Expand All @@ -53,6 +54,7 @@ int genomecoverage_main(int argc, char* argv[]) {
bool only_5p_end = false;
bool only_3p_end = false;
bool add_gb_track_line = false;
bool tn5 = false;
string gb_track_opts;
string requestedStrand = "X";

Expand Down Expand Up @@ -173,6 +175,18 @@ int genomecoverage_main(int argc, char* argv[]) {
showHelp = true;
}
}
else if(PARAMETER_CHECK("-ext", 4, parameterLength)) {
if ((i+1) < argc) {
extensionSize = atoi(argv[i+1]);
i++;
} else {
cerr << "*****ERROR: -ext options requires an integer value" << endl;
showHelp = true;
}
}
else if(PARAMETER_CHECK("-tn5", 4, parameterLength)) {
tn5 = true;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
Expand Down Expand Up @@ -207,6 +221,16 @@ int genomecoverage_main(int argc, char* argv[]) {
showHelp = true;
}

/*if ( tn5 && obeySplits) {
cerr << endl << "*****" << endl << "*****ERROR: Use -split can't be used with -tn5." << endl << "*****" << endl;
showHelp = true;
}

if ( (extensionSize>0) && obeySplits) {
cerr << endl << "*****" << endl << "*****ERROR: Use -split can't be used with -ext." << endl << "*****" << endl;
showHelp = true;
}*/

if (add_gb_track_line && !(bedGraph||bedGraphAll)) {
cerr << endl << "*****" << endl << "*****ERROR: Using -trackline requires bedGraph output (use -bg or -bga)." << endl << "*****" << endl;
showHelp = true;
Expand All @@ -225,7 +249,8 @@ int genomecoverage_main(int argc, char* argv[]) {
only_5p_end, only_3p_end,
pair_chip, haveSize, fragmentSize, dUTP,
eachBaseZeroBased,
add_gb_track_line, gb_track_opts);
add_gb_track_line, gb_track_opts,
extensionSize, tn5);
delete bc;
}
else {
Expand Down Expand Up @@ -280,7 +305,7 @@ void genomecoverage_help(void) {
cerr << "\t-fs\t\t" << "Force to use provided fragment size instead of read length" << endl;
cerr << "\t\t\tWorks for BAM files only" << endl;

cerr << "\t-du\t\t" << "Change strand af the mate read (so both reads from the same strand) useful for strand specific" << endl;
cerr << "\t-du\t\t" << "Change strand of the mate read (so both reads from the same strand) useful for strand specific" << endl;
cerr << "\t\t\tWorks for BAM files only" << endl;

cerr << "\t-5\t\t" << "Calculate coverage of 5\" positions (instead of entire interval)." << endl << endl;
Expand All @@ -303,14 +328,25 @@ void genomecoverage_help(void) {
cerr <<"\t\t\t http://genome.ucsc.edu/goldenPath/help/bedgraph.html" << endl;
cerr <<"\t\t\t- NOTE: When adding a trackline definition, the output BedGraph can be easily" << endl;
cerr <<"\t\t\t uploaded to the Genome Browser as a custom track," << endl;
cerr <<"\t\t\t BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl;
//cerr <<"\t\t\t BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl;
// With v 4 there is no issue.

cerr << "\t-trackopts\t"<<"Writes additional track line definition parameters in the first line." << endl;
cerr <<"\t\t\t- Example:" << endl;
cerr <<"\t\t\t -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'" << endl;
cerr <<"\t\t\t Note the use of single-quotes if you have spaces in your parameters." << endl;
cerr <<"\t\t\t- (TEXT)" << endl << endl;

cerr << "\t-ext\t\t"<<"Extends the coverage in both directions of the desired number of bases." << endl;
cerr << "\t\t\tUseful when you have very sparse data like when you use -5 or -3." << endl;
cerr << "\t\t\t- Default is 0; i.e., no extension." << endl;
cerr << "\t\t\t- (INT)" << endl << endl;

cerr << "\t-tn5\t\t"<<"Shifts the 5' to match the insertion site of the Tn5." << endl;
cerr << "\t\t\tIt will shift the 5' extremity of 5bp to the left for reverse strand" << endl;
cerr << "\t\t\tand 4bp to the right for forward strand." << endl;
cerr << "\t\t\tUseful when you are working with ATAC-seq data and you want to see the footprint." << endl;

cerr << "Notes: " << endl;
cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl;
cerr << "\t <chromName><TAB><chromSize>" << endl << endl;
Expand Down
27 changes: 25 additions & 2 deletions test/genomecov/test-genomecov.sh
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,6 @@ $BT genomecov -ibam chip.bam -bg -fs 100 > obs
check obs exp
rm obs exp

rm one_block.bam two_blocks.bam three_blocks.bam sam-w-del.bam pair-chip.bam chip.bam

##################################################################
# Make sure empty bam doesn't cause failure
##################################################################
Expand All @@ -286,4 +284,29 @@ CRAM_REFERENCE=test_ref.fa $BT genomecov -ibam empty.cram > obs
check obs exp
rm obs exp

##################################################################
# Test chip with tn5
##################################################################
echo -e " genomecov.t18...\c"
echo \
"chr1 5 76 1
chr1 225 295 1" > exp
$BT genomecov -ibam chip.bam -bg -tn5 > obs
check obs exp
rm obs exp

##################################################################
# Test chip with ext
##################################################################
echo -e " genomecov.t19...\c"
echo \
"chr1 0 86 1
chr1 215 310 1" > exp
$BT genomecov -ibam chip.bam -bg -ext 10 > obs
check obs exp
rm obs exp


rm one_block.bam two_blocks.bam three_blocks.bam sam-w-del.bam pair-chip.bam chip.bam

[[ $FAILURES -eq 0 ]] || exit 1;