-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_accuracy.R
130 lines (108 loc) · 4.42 KB
/
analyze_accuracy.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
library(tidyverse)
# LATEST
###########
# precision
###########
parser = . %>%
set_names(c("class", "hit", "isomir", "counts")) %>%
# mutate(isomir=ifelse(isomir=="", "5prime_non", isomir)) %>%
# mutate(isomir=ifelse(isomir=="3prime_addition", "3prime_non", isomir)) %>%
separate(isomir, into = c("isomir", "type", "size"), sep="-", fill = "right") %>%
mutate(size=ifelse(is.na(size), "1", size)) %>%
group_by(isomir, type) %>%
mutate(pct=as.numeric(counts)/sum(as.numeric(counts))*100) %>%
mutate(hit=ifelse(hit=="None", "Missed",hit)) %>%
ungroup()
###########
# round0
###########
r0 = list.files("data/output.S0/stats/", "accuracy", full.names = T) %>%
lapply(., function(fn){
name = gsub("_accuracy.tsv","",basename(fn))
read_tsv(fn, col_names = FALSE) %>%
parser %>% mutate(tool=name)
}) %>% bind_rows() %>%
mutate(round = "cannonical")
###########
# round1
###########
r1 = list.files("data/output.S1/stats/", "accuracy", full.names = T) %>%
lapply(., function(fn){
name = gsub("_accuracy.tsv","",basename(fn))
read_tsv(fn, col_names = FALSE) %>%
parser %>% mutate(tool=name)
}) %>% bind_rows() %>%
mutate(round = "isomirs")
###########
# round2
###########
r2 = list.files("data/output.S2/stats/", "accuracy", full.names = T) %>%
lapply(., function(fn){
name = gsub("_accuracy.tsv","",basename(fn))
read_tsv(fn, col_names = FALSE) %>%
parser %>% mutate(tool=name)
}) %>% bind_rows() %>%
mutate(round = "isomirs Comb")
###########
# round3
###########
r3 = list.files("data/output.S3/stats/", "accuracy", full.names = T) %>%
lapply(., function(fn){
name = gsub("_accuracy.tsv","",basename(fn))
read_tsv(fn, col_names = FALSE) %>%
parser %>% mutate(tool=name)
}) %>% bind_rows() %>%
mutate(round = "sRNA")
###########
# round4
###########
r4 = list.files("data/output.S4/stats/", "accuracy", full.names = T) %>%
lapply(., function(fn){
name = gsub("_accuracy.tsv","",basename(fn))
read_tsv(fn, col_names = FALSE) %>%
parser %>% mutate(tool=name)
}) %>% bind_rows() %>%
mutate(round = "isomirs Comb + sRNA")
#############################################################################
full = bind_rows(r0,r1,r2,r3,r4) %>%
# filter(tool!="bowtie1_default", isomir!="other", tool!="gsnap_m10") %>%
# filter(isomir!="NA") %>%
mutate(round = factor(round, levels = c("cannonical","isomirs", "isomirs Comb", "isomirs Comb + sRNA"))) %>%
mutate(class=paste(hit, class)) %>%
mutate(tool_round = paste(tool, round),
tool_round = factor(tool_round, levels = rev(unique(tool_round)))) %>%
mutate(is_comb = ifelse(grepl(":", isomir), "yes", "no"))
full %>% filter(round=="isomirs") %>%
# filter(!(isomir %in% c("snp", "indel"))) %>%
mutate(isomir=paste(isomir,type)) %>%
group_by(tool, isomir, size) %>%
mutate(total = sum(pct),
pct = pct / total * 100) %>%
ggplot(aes(tool, pct, fill=class)) +
geom_bar(stat = "identity") +
theme(legend.position="bottom") +
theme(axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1)) +
facet_grid(size~isomir) +
scale_fill_manual(values = c("grey", "orange", "blue", "orange4", "blue4")) +
ggsave("results/S1-accuracy-nocomb-bysize.pdf", width=12, height = 6)
full %>% filter(is_comb=="no", round!="sRNA") %>%
mutate(isomir=paste(isomir,type)) %>%
ggplot(aes(tool_round, pct, fill=class)) +
geom_bar(stat = "identity") +
theme(legend.position="bottom") +
theme(axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1)) +
coord_flip() +
facet_wrap(~isomir, nrow=1) +
scale_fill_manual(values = c("grey", "orange", "blue", "orange4", "blue4")) +
ggsave("results/accuracy-nocomb.pdf", width=12, height = 6)
full %>% filter(is_comb=="yes") %>%
mutate(isomir=paste(isomir,type)) %>%
mutate(isomir=gsub(":", "\n",isomir)) %>%
ggplot(aes(tool_round, pct, fill=class)) +
geom_bar(stat = "identity") +
theme(legend.position="bottom") +
theme(axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1)) +
coord_flip() +
facet_wrap(~isomir, nrow=1) +
scale_fill_manual(values = c("grey", "orange", "blue", "orange4", "blue4")) +
ggsave("results/accuracy-comb.pdf", width=12, height = 6)