-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBowtieEntry.cpp
131 lines (100 loc) · 2.91 KB
/
BowtieEntry.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
* BowtieEntry.cpp
*
* Created on: Mar 3, 2010
* Author: marcus
*/
#include "BowtieEntry.h"
BowtieEntry::BowtieEntry(int offs) {
offset = offs;
_mapped_transcript = "";
_mapped_gene = "";
read = NULL;
position = -1;
read_id = "";
base_read_id = "";
}
BowtieEntry::BowtieEntry(string read_name, string map, string orient, int pos, string seq,
string qual, string mismatches, int off) {
read_id = read_name;
base_read_id = read_id.substr(0, read_id.size() - 2);
mapping = map;
strand = orient;
position = pos;
offset = off;
read = new Read(read_id, seq, qual, offset);
mismatch_indices = get_indices(mismatches);
_mapped_transcript = "";
_mapped_gene = "";
}
BowtieEntry::~BowtieEntry() {
delete read;
}
istream& operator>>(istream &stream, BowtieEntry& bt) {
char btline[1500];
vector<string> lineparts;
string mms;
stream.getline(btline, (streamsize)1500);
myTokenize(string(btline), lineparts, "\t");
if (lineparts.size() == 8){
mms = lineparts.at(7);
} else {
mms = "";
}
if(lineparts.size()>6 && !stream.eof()){
bt.read_id = lineparts[0];
bt.base_read_id = bt.read_id.substr(0, bt.read_id.size() - 2);
bt.mapping = lineparts[2];
bt.strand = lineparts[1];
bt.position = (int)atoi(lineparts.at(3).c_str());
delete bt.read;
bt.read = new Read(lineparts[0], lineparts[4], lineparts[5], bt.offset);
bt.mismatch_indices = bt.get_indices(mms);
}
return stream;
}
vector<int> BowtieEntry::get_indices(string mismatchstring) {
vector<string> parts;
vector<int> output;
size_t coloni;
myTokenize(mismatchstring, parts, ",");
for(unsigned int i=0; i<parts.size(); i++){
coloni = parts[i].find_first_of(":");
parts[i].erase(coloni, parts[i].length()-coloni);
output.push_back((int)atoi(parts.at(i).c_str()));
}
return output;
}
long double BowtieEntry::mapping_probability() {
long double P;
P = 1;
for(unsigned int i=0; i<read->quality->quality_str.size(); i++){
if(find(mismatch_indices.begin(), mismatch_indices.end(), i) == mismatch_indices.end()){ //if match
// cout << "error_prob at " << i << " " << read->quality->error_probabilities[i] <<endl;
P *= (1 - read->quality->error_probabilities[i]);
} else { //if mismatch
// cout << "error_prob at " << i << " " << read->quality->error_probabilities[i] <<endl;
P *= read->quality->error_probabilities[i]/3;
}
}
return P;
}
string BowtieEntry::mapped_gene() {
if(_mapped_gene.length() > 0) {return _mapped_gene;}
else {parse_mapping(); return _mapped_gene;}
}
string BowtieEntry::mapped_transcript() {
if(_mapped_transcript.length() > 0) {return _mapped_transcript;}
else {parse_mapping(); return _mapped_transcript;}
}
void BowtieEntry::parse_mapping() {
char * maptok;
char * cstr;
cstr = new char [mapping.size() + 1];
strcpy(cstr, mapping.c_str());
maptok = strtok(cstr, "|");
_mapped_transcript = string(maptok);
maptok = strtok(NULL, "|");
_mapped_gene = string(maptok);
delete[] cstr;
}