-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbayescanPlot.R
167 lines (140 loc) · 7.22 KB
/
bayescanPlot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# This file is used to plot figures for the software Bayescan in R.
# This program, BayeScan, aims at detecting genetics markers under selection,
# based on allele frequency differences between population.
# Copyright (C) 2010 Matthieu Foll
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Arguments:
# - file is the name of your file ex: "output_fst.txt"
# - the q-value threshold corresponding to the target False Discovery Rate (FDR)
# - size is the size of the points and text labels for outliers
# - pos is the distance between the points and the labels
# - highlight is a optional list of marker indices to display in red.
# - name_highlighted alows to write the indices of highlighted markers instead of using a point like the other markers
# - add_text adds the indices of the outlier markers
# Output:
# This function returns different paremeters in a list
# - outliers: the list of outliers
# - nb_outliers: the number of outliers
# Typical usage:
# - load this file into R (file/source R code)
# - in R, go to the directory where "output_fst.txt" is (file/change current dir)
# - at the R prompt, type
# > plot_bayescan("output_fst.txt",0,FDR=0.05)
# if you save the output in a variable, you can recall the different results:
# results<-plot_bayescan("output_fst.txt",0,FDR=0.05)
# results$outliers
# results$nb_outliers
#
# plotting posterior distribution is very easy in R with the output of BayeScan:
# first load the output file *.sel produced by BayeScan
# > mydata=read.table("bi.sel",colClasses="numeric")
# choose the parameter you want to plot by setting for example:
# parameter="Fst1"
# then this line will make the plot for:
# > plot(density(mydata[[parameter]]),xlab=parameter,main=paste(parameter,"posterior distribution"))
# you can plot population specific Fst coefficient by setting
# parameter="Fst1"
# if you have non-codominant data you can plot posterior for Fis coefficients in each population:
# parameter="Fis1"
# if you test for selection, you can plot the posterior for alpha coefficient for selection:
# parameter="alpha1"
# you also have access to the likelihood with:
# parameter="logL"
# if you have the package "boa" installed, you can very easily obtain Highest Probability
# Density Interval (HPDI) for your parameter of interest (example for the 95% interval):
# > boa.hpd(mydata[[parameter]],0.05)
plot_bayescan<-function(res,FDR=0.05,size=1,pos=0.35,highlight=NULL,name_highlighted=F,add_text=T)
{
if (is.character(res))
res=read.table(res)
colfstat=5
colq=colfstat-2
highlight_rows=which(is.element(as.numeric(row.names(res)),highlight))
non_highlight_rows=setdiff(1:nrow(res),highlight_rows)
outliers=as.integer(row.names(res[res[,colq]<=FDR,]))
ok_outliers=TRUE
if (sum(res[,colq]<=FDR)==0)
ok_outliers=FALSE;
res[res[,colq]<=0.0001,colq]=0.0001
# plot
plot(log10(res[,colq]),res[,colfstat],xlim=rev(range(log10(res[,colq]))),xlab="log10(q value)",ylab=names(res[colfstat]),type="n")
points(log10(res[non_highlight_rows,colq]),res[non_highlight_rows,colfstat],pch=19,cex=size)
if (name_highlighted) {
if (length(highlight_rows)>0) {
text(log10(res[highlight_rows,colq]),res[highlight_rows,colfstat],row.names(res[highlight_rows,]),col="red",cex=size*1.2,font=2)
}
}
else {
points(log10(res[highlight_rows,colq]),res[highlight_rows,colfstat],col="red",pch=19,cex=size)
# add names of loci over p and vertical line
if (ok_outliers & add_text) {
text(log10(res[res[,colq]<=FDR,][,colq])+pos*(round(runif(nrow(res[res[,colq]<=FDR,]),1,2))*2-3),res[res[,colq]<=FDR,][,colfstat],row.names(res[res[,colq]<=FDR,]),cex=size)
}
}
lines(c(log10(FDR),log10(FDR)),c(-1,1),lwd=2)
return(list("outliers"=outliers,"nb_outliers"=length(outliers)))
}
par(mfrow=c(1,1))
plot_bayescan('/home/jonas/Documents/Masterthesis/data/Analyses/Bayescan/BayescanOutput/alcon/alcon_fst.txt', FDR = 0.05)#+title('Alcon')
plot_bayescan('/home/jonas/Documents/Masterthesis/data/Analyses/Bayescan/BayescanOutput/gentian/gentian_fst.txt', FDR = 0.05)#+title('Gentian')
#Make own nice GGplot with clear labels
##ALCON
bayes.alc <- read.delim('/home/jonas/Documents/Masterthesis/data/Analyses/Bayescan/BayescanOutput/alcon/alcon_fst.txt', h = T, sep = ' ')
bayes.alc <- bayes.alc[,3:7]
colnames(bayes.alc) <- c('prob','log10(PO)','qval','alpha','fst')
signSNP.bayes.alc <- bayes.alc[bayes.alc$qval <= 0.05,]
bayes.alc[bayes.alc$qval < 0.001, 3] <- 0.001 #set q-value to at least 0.001 (to show on plot)
bayes.alc$balancing <- as.factor(ifelse(test = bayes.alc$alpha<0 & bayes.alc$qval<0.05, 1, 0))
library(ggplot2)
library(ggthemes)
library(dplyr)
library(tibble)
library(ggrepel)
ggplot(data = bayes.alc, aes(x= log10(qval), y= fst, color = balancing))+
geom_point()+
geom_vline(xintercept = log10(0.05), color = 'tomato1')+
scale_x_reverse()+
xlim(0, -3) +
theme_classic()+
ggtitle('Alcon')+
xlab('Log10(Q-value)')+ylab('Fst')+
geom_label_repel(data = bayes.alc %>% filter(qval < 0.05),
aes(label=rownames(signSNP.bayes.alc)), size = 3)+
scale_color_manual(values=c('black', '#116E8A'))+
theme(legend.position = "none")
bayes.gen <- read.delim('/home/jonas/Documents/Masterthesis/data/Analyses/Bayescan/BayescanOutput/gentian/gentian_fst.txt', h = T, sep = ' ')
bayes.gen <- bayes.gen[,3:7]
colnames(bayes.gen) <- c('prob','log10(PO)','qval','alpha','fst')
signSNP.bayes.gen <- bayes.gen[bayes.gen$qval <= 0.05,]
bayes.gen[bayes.gen$qval < 0.001, 3] <- 0.001 #set q-value to at least 0.001 (to show on plot)
bayes.gen$balancing <- as.factor(ifelse(test = bayes.gen$alpha<0 & bayes.gen$qval<0.05, 1, 0))
ggplot(data = bayes.gen, aes(x= log10(qval), y= fst, color = balancing))+
geom_point()+
geom_vline(xintercept = log10(0.05), color = 'tomato1')+
scale_x_reverse()+
xlim(0, -3) +
theme_classic()+
ggtitle('Gentian')+
xlab('Log10(Q-value)')+ylab('Fst')+
geom_label_repel(data = bayes.gen %>% filter(qval < 0.05),
aes(label=rownames(signSNP.bayes.gen)), size = 3)+
scale_color_manual(values=c('black', '#116E8A'))+
theme(legend.position = "none")
alcondata=read.table('/home/jonas/Documents/Masterthesis/data/Analyses/Bayescan/BayescanOutput/alcon/alcon.sel',colClasses="numeric")
gentiandata=read.table('/home/jonas/Documents/Masterthesis/data/Analyses/Bayescan/BayescanOutput/gentian/gentian.sel',colClasses="numeric")
# choose the parameter you want to plot by setting for example:
parameter="Fst1"
# then this line will make the plot for:
plot(density(alcondata[[parameter]]),xlab=parameter,main=paste(parameter,"posterior distribution"))
plot(density(gentiandata[[parameter]]),xlab=parameter,main=paste(parameter,"posterior distribution"))