Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ typedef unsigned char uint8;

const char ATCG_BASES[] = {'A', 'T', 'C', 'G'};

#pragma pack(2)


#pragma pack()


// how many reads one pack has
static const int PACK_SIZE = 256;

Expand All @@ -50,6 +44,7 @@ static const int FAIL_LENGTH = 16;
static const int FAIL_TOO_LONG = 17;
static const int FAIL_QUALITY = 20;
static const int FAIL_COMPLEXITY = 24;
static const int FAIL_ADAPTER_DIMER = 28;

// how many types in total we support
static const int FILTER_RESULT_TYPES = 32;
Expand All @@ -62,7 +57,7 @@ const static char* FAILED_TYPES[FILTER_RESULT_TYPES] = {
"failed_too_short", "failed_too_long", "", "",
"failed_quality_filter", "", "", "",
"failed_low_complexity", "", "", "",
"", "", "", ""
"failed_adapter_dimer", "", "", ""
};

#endif /* COMMON_H */
12 changes: 12 additions & 0 deletions src/filterresult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ FilterResult::FilterResult(Options* opt, bool paired){
mTrimmedAdapterRead = 0;
mTrimmedAdapterBases = 0;
mMergedPairs = 0;
mAdapterDimerReads = 0;
for(int i=0; i<FILTER_RESULT_TYPES; i++) {
mFilterReadStats[i] = 0;
}
Expand Down Expand Up @@ -50,6 +51,7 @@ FilterResult* FilterResult::merge(vector<FilterResult*>& list) {
result->mTrimmedAdapterRead += list[i]->mTrimmedAdapterRead;
result->mTrimmedAdapterBases += list[i]->mTrimmedAdapterBases;
result->mMergedPairs += list[i]->mMergedPairs;
result->mAdapterDimerReads += list[i]->mAdapterDimerReads;

for(int b=0; b<4; b++) {
result->mTrimmedPolyXReads[b] += list[i]->mTrimmedPolyXReads[b];
Expand Down Expand Up @@ -201,6 +203,11 @@ long FilterResult::getTotalPolyXTrimmedBases() {
return sum_bases;
}

void FilterResult::addAdapterDimer(int readNum) {
mAdapterDimerReads += readNum;
addFilterResult(FAIL_ADAPTER_DIMER, readNum);
}


void FilterResult::print() {
cerr << "reads passed filter: " << mFilterReadStats[PASS_FILTER] << endl;
Expand All @@ -215,6 +222,7 @@ void FilterResult::print() {
cerr << "reads failed due to low complexity: " << mFilterReadStats[FAIL_COMPLEXITY] << endl;
}
if(mOptions->adapter.enabled) {
cerr << "reads failed due to adapter dimer: " << mFilterReadStats[FAIL_ADAPTER_DIMER] << endl;
cerr << "reads with adapter trimmed: " << mTrimmedAdapterRead << endl;
cerr << "bases trimmed due to adapters: " << mTrimmedAdapterBases << endl;
}
Expand All @@ -240,6 +248,8 @@ void FilterResult::reportJson(ofstream& ofs, string padding) {
ofs << padding << "\t" << "\"too_many_N_reads\": " << mFilterReadStats[FAIL_N_BASE] << "," << endl;
if(mOptions->complexityFilter.enabled)
ofs << padding << "\t" << "\"low_complexity_reads\": " << mFilterReadStats[FAIL_COMPLEXITY] << "," << endl;
if(mOptions->adapter.enabled)
ofs << padding << "\t" << "\"adapter_dimer_reads\": " << mFilterReadStats[FAIL_ADAPTER_DIMER] << "," << endl;
ofs << padding << "\t" << "\"too_short_reads\": " << mFilterReadStats[FAIL_LENGTH] << "," << endl;
ofs << padding << "\t" << "\"too_long_reads\": " << mFilterReadStats[FAIL_TOO_LONG] << endl;

Expand Down Expand Up @@ -366,6 +376,8 @@ void FilterResult::reportHtml(ofstream& ofs, long totalReads, long totalBases) {
}
if(mOptions->complexityFilter.enabled)
HtmlReporter::outputRow(ofs, "reads with low complexity:", HtmlReporter::formatNumber(mFilterReadStats[FAIL_COMPLEXITY]) + " (" + to_string(mFilterReadStats[FAIL_COMPLEXITY] * 100.0 / total) + "%)");
if(mOptions->adapter.enabled)
HtmlReporter::outputRow(ofs, "reads with adapter dimer:", HtmlReporter::formatNumber(mFilterReadStats[FAIL_ADAPTER_DIMER]) + " (" + to_string(mFilterReadStats[FAIL_ADAPTER_DIMER] * 100.0 / total) + "%)");
ofs << "</table>\n";
}

Expand Down
2 changes: 2 additions & 0 deletions src/filterresult.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class FilterResult{
void addPolyXTrimmed(int base, int length);
long getTotalPolyXTrimmedReads();
long getTotalPolyXTrimmedBases();
void addAdapterDimer(int readNum=1);
// a part of JSON report
void reportJson(ofstream& ofs, string padding);
// a part of JSON report for adapters
Expand Down Expand Up @@ -67,6 +68,7 @@ class FilterResult{
bool mPaired;
long mCorrectedReads;
long mMergedPairs;
long mAdapterDimerReads;
private:
long mFilterReadStats[FILTER_RESULT_TYPES];
long mTrimmedAdapterRead;
Expand Down
2 changes: 2 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ int main(int argc, char* argv[]){
cmd.add<string>("adapter_fasta", 0, "specify a FASTA file to trim both read1 and read2 (if PE) by all the sequences in this FASTA file", false, "");
cmd.add("detect_adapter_for_pe", '2', "enable adapter detection for PE data to get ultra-clean data. It takes more time to find just a little bit more adapters.");
cmd.add("allow_gap_overlap_trimming", 0, "allow up to one gap when trim adapters by overlap analysis for PE data. By default no gap is allowed.");
cmd.add<int>("dimer_max_len", 0, "if the read length is less than or equal to this value after adapter trimming, it is considered an adapter dimer. Requires adapter evidence.", false, 2);

// trimming
cmd.add<int>("trim_front1", 'f', "trimming how many bases in front for read1, default is 0", false, 0);
Expand Down Expand Up @@ -217,6 +218,7 @@ int main(int argc, char* argv[]){
opt.adapter.enabled = !cmd.exist("disable_adapter_trimming");
opt.adapter.detectAdapterForPE = cmd.exist("detect_adapter_for_pe");
opt.adapter.allowGapOverlapTrimming = cmd.exist("allow_gap_overlap_trimming");
opt.adapter.dimerMaxLen = cmd.get<int>("dimer_max_len");
opt.adapter.sequence = cmd.get<string>("adapter_sequence");
opt.adapter.sequenceR2 = cmd.get<string>("adapter_sequence_r2");
opt.adapter.fastaFile = cmd.get<string>("adapter_fasta");
Expand Down
2 changes: 2 additions & 0 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ class AdapterOptions {
hasSeqR2 = false;
detectAdapterForPE = false;
allowGapOverlapTrimming = false;
dimerMaxLen = 2;
}
public:
bool enabled;
Expand All @@ -216,6 +217,7 @@ class AdapterOptions {
bool hasFasta;
bool detectAdapterForPE;
bool allowGapOverlapTrimming;
int dimerMaxLen;
};

class TrimmingOptions {
Expand Down
27 changes: 24 additions & 3 deletions src/peprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ bool PairEndProcessor::processPairEnd(ReadPack* leftPack, ReadPack* rightPack, T
PolyX::trimPolyG(r1, r2, config->getFilterResult(), mOptions->polyGTrim.minLen);
}
bool isizeEvaluated = false;
bool isAdapterDimer = false;
if(r1 != NULL && r2!=NULL && (mOptions->adapter.enabled || mOptions->correction.enabled)){
OverlapResult ov = OverlapAnalysis::analyze(r1, r2, mOptions->overlapDiffLimit, mOptions->overlapRequire, mOptions->overlapDiffPercentLimit/100.0, mOptions->adapter.allowGapOverlapTrimming);
// we only use thread 0 to evaluae ISIZE
Expand All @@ -452,8 +453,17 @@ bool PairEndProcessor::processPairEnd(ReadPack* leftPack, ReadPack* rightPack, T
trimmed2 = AdapterTrimmer::trimBySequence(r2, config->getFilterResult(), mOptions->adapter.sequenceR2, true);
}
if(mOptions->adapter.hasFasta) {
AdapterTrimmer::trimByMultiSequences(r1, config->getFilterResult(), mOptions->adapter.seqsInFasta, false, !trimmed1);
AdapterTrimmer::trimByMultiSequences(r2, config->getFilterResult(), mOptions->adapter.seqsInFasta, true, !trimmed2);
trimmed1 |= AdapterTrimmer::trimByMultiSequences(r1, config->getFilterResult(), mOptions->adapter.seqsInFasta, false, !trimmed1);
trimmed2 |= AdapterTrimmer::trimByMultiSequences(r2, config->getFilterResult(), mOptions->adapter.seqsInFasta, true, !trimmed2);
}

// Check for adapter dimer: both reads shorter than threshold after adapter trimming
// AND adapters were detected in at least one of the reads (requires evidence)
if(r1 != NULL && r2 != NULL && (trimmed1 || trimmed2) &&
r1->length() <= mOptions->adapter.dimerMaxLen &&
r2->length() <= mOptions->adapter.dimerMaxLen) {
isAdapterDimer = true;
config->getFilterResult()->addAdapterDimer(2);
}
}
}
Expand Down Expand Up @@ -504,13 +514,19 @@ bool PairEndProcessor::processPairEnd(ReadPack* leftPack, ReadPack* rightPack, T
mergeProcessed = true;
} else if(mOptions->merge.includeUnmerged){
int result1 = mFilter->passFilter(r1);
int result2 = mFilter->passFilter(r2);

if(isAdapterDimer) {
result1 = FAIL_ADAPTER_DIMER;
result2 = FAIL_ADAPTER_DIMER;
}

config->addFilterResult(result1, 1);
if(result1 == PASS_FILTER && !dedupOut) {
r1->appendToString(mergedOutput);
config->getPostStats1()->statRead(r1);
}

int result2 = mFilter->passFilter(r2);
config->addFilterResult(result2, 1);
if(result2 == PASS_FILTER && !dedupOut) {
r2->appendToString(mergedOutput);
Expand All @@ -527,6 +543,11 @@ bool PairEndProcessor::processPairEnd(ReadPack* leftPack, ReadPack* rightPack, T
int result1 = mFilter->passFilter(r1);
int result2 = mFilter->passFilter(r2);

if(isAdapterDimer) {
result1 = FAIL_ADAPTER_DIMER;
result2 = FAIL_ADAPTER_DIMER;
}

config->addFilterResult(max(result1, result2), 2);

if(!dedupOut) {
Expand Down
19 changes: 15 additions & 4 deletions src/seprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ bool SingleEndProcessor::processSingleEnd(ReadPack* pack, ThreadConfig* config){
if(mOptions->fixMGI) {
or1->fixMGI();
}

// umi processing
if(mOptions->umi.enabled)
mUmiProcessor->process(or1);
Expand All @@ -239,13 +239,21 @@ bool SingleEndProcessor::processSingleEnd(ReadPack* pack, ThreadConfig* config){
PolyX::trimPolyG(r1, config->getFilterResult(), mOptions->polyGTrim.minLen);
}

bool isAdapterDimer = false;
if(r1 != NULL && mOptions->adapter.enabled){
bool trimmed = false;
if(mOptions->adapter.hasSeqR1)
trimmed = AdapterTrimmer::trimBySequence(r1, config->getFilterResult(), mOptions->adapter.sequence, false);
bool incTrimmedCounter = !trimmed;
if(mOptions->adapter.hasFasta) {
AdapterTrimmer::trimByMultiSequences(r1, config->getFilterResult(), mOptions->adapter.seqsInFasta, false, incTrimmedCounter);
trimmed |= AdapterTrimmer::trimByMultiSequences(r1, config->getFilterResult(), mOptions->adapter.seqsInFasta, false, incTrimmedCounter);
}

// Check for adapter dimer: read shorter than threshold after adapter trimming
// AND adapter was detected (requires evidence)
if(r1 != NULL && trimmed && r1->length() <= mOptions->adapter.dimerMaxLen) {
isAdapterDimer = true;
config->getFilterResult()->addAdapterDimer(1);
}
}

Expand All @@ -261,6 +269,9 @@ bool SingleEndProcessor::processSingleEnd(ReadPack* pack, ThreadConfig* config){

int result = mFilter->passFilter(r1);

if(isAdapterDimer)
result = FAIL_ADAPTER_DIMER;

config->addFilterResult(result, 1);

if(!dedupOut) {
Expand All @@ -285,7 +296,7 @@ bool SingleEndProcessor::processSingleEnd(ReadPack* pack, ThreadConfig* config){
// split output by each worker thread
if(!mOptions->out1.empty())
config->getWriter1()->writeString(outstr);
}
}

if(mLeftWriter) {
mLeftWriter->input(tid, outstr);
Expand Down Expand Up @@ -438,7 +449,7 @@ void SingleEndProcessor::processorTask(ThreadConfig* config)
usleep(100);
}
}
input->setConsumerFinished();
input->setConsumerFinished();

mFinishedThreads++;
if(mFinishedThreads == mOptions->thread) {
Expand Down