-
Notifications
You must be signed in to change notification settings - Fork 18
/
picrust2-aldex-analysis.R
174 lines (141 loc) · 8.59 KB
/
picrust2-aldex-analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#
# Run picrust2
#######################################################
# Run below commands outside R in Linux environment
# If you have picrust2 installed on a conda environment, activate the required environment, e.g.
# $ conda activate picrust2
#
# Requires fasta file "expr.asv.fasta" and biom file "expr.biom" generated during phyloseq analysis
# The `--output` argument to specify the output folder for final files
# The `--processes N` argument to specify the number of CPUs to run picrust2 in parallel
#
#----- START: Run in terminal/console -----#
# Run picrust2_pipeline.py
/path-to-picrust2_pipeline.py --study_fasta expr.asv.fasta --input expr.biom \
--output picrust2_out_stratified --processes 2 --stratified --remove_intermediate --verbose
# Locate and take note the directory that keeps the picrust2 mapfiles
locate description_mapfiles
#----- END: Run in terminal/console -----#
#
# Example location:
# /home/user/miniconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles
# /home/user/miniconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles/KEGG_modules_info.tsv.gz
# /home/user/miniconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles/KEGG_pathways_info.tsv.gz
# ...
# Deactivate the environment if required
# $ conda deactivate
#######################################################
library("data.table") # Also requires R.utils to read gz and bz2 files
library("phyloseq")
library("ALDEx2")
library("tidyverse")
options(width=190)
# Continued from phyloseq workflow (phyloseq-analysis.R)
# Load saved workspace
# load("image3.RData")
# Note to change the PATH to the "picrust2" output folder accordingly
# Full path is not necessary if `R` is executed in the directory one-level above "picrust2_out_stratified"
picrust2 = "picrust2_out_stratified"
# Note to change the PATH to the picrust2 mapfiles accordingly
# Use the "description_mapfiles" PATH you located with the `locate` command above
mapfile = "/home/user/miniconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles"
# Set picrust2 output file paths
p2_EC = paste0(picrust2, "/EC_metagenome_out/pred_metagenome_unstrat.tsv.gz")
p2_KO = paste0(picrust2, "/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz")
p2_PW = paste0(picrust2, "/pathways_out/path_abun_unstrat.tsv.gz")
# Set picrust2 map file paths
mapfile_EC = paste0(mapfile, "/ec_level4_info.tsv.gz")
mapfile_KO = paste0(mapfile, "/ko_info.tsv.gz")
mapfile_PW = paste0(mapfile, "/metacyc_pathways_info.txt.gz")
# Load map files
mapEC = as.data.frame(fread(mapfile_EC, header = FALSE))
colnames(mapEC) = c("function","description")
mapKO = as.data.frame(fread(mapfile_KO, header = FALSE, sep = "\t"))
colnames(mapKO) = c("function","description")
mapPW = as.data.frame(fread(mapfile_PW, header = FALSE))
colnames(mapPW) = c("pathway","description")
# Load picrust2 output files
p2EC = as.data.frame(fread(p2_EC))
rownames(p2EC) = p2EC$"function"
p2EC = as.matrix(p2EC[,-1])
p2EC = round(p2EC)
p2KO = as.data.frame(fread(p2_KO))
rownames(p2KO) = p2KO$"function"
p2KO = as.matrix(p2KO[,-1])
p2KO = round(p2KO)
p2PW = as.data.frame(fread(p2_PW))
rownames(p2PW) = p2PW$"pathway"
p2PW = as.matrix(p2PW[,-1])
p2PW = round(p2PW)
# Perform compositional data analysis (CoDA)
# Allow statistical analysis between 2 groups/conditions
# The default "mc.samples" option is 128
set.seed(12345)
aldex2_EC = aldex(p2EC, sample_data(ps1)$Group, mc.samples = 500, test = "t", effect = TRUE, denom = "iqlr", verbose = TRUE)
head(aldex2_EC)
set.seed(12345)
aldex2_KO = aldex(p2KO, sample_data(ps1)$Group, mc.samples = 500, test = "t", effect = TRUE, denom = "iqlr", verbose = TRUE)
head(aldex2_KO)
set.seed(12345)
aldex2_PW = aldex(p2PW, sample_data(ps1)$Group, mc.samples = 500, test = "t", effect = TRUE, denom = "iqlr", verbose = TRUE)
head(aldex2_PW)
# Check estimated effect size range
# ALDEx2 authors suggest that an effect size of 1 or greater can be used as significance cutoff
png("images/ALDEx2_picrust2_effect.png", width = 6, height = 6, units = "in", res = 300)
par(mfrow = c(2,2))
hist(aldex2_EC$effect, breaks = 20, xlab = "effect size", col = "yellow", main = "EC")
hist(aldex2_KO$effect, breaks = 20, xlab = "effect size", col = "yellow", main = "KO")
hist(aldex2_PW$effect, breaks = 20, xlab = "effect size", col = "yellow", main = "Pathway")
dev.off()
# Create MW (fold-change to variance/effect) and MA (Bland-Altman) plots
# The "test" option can be "welch" or "wilcox"
png("ALDEx2_picrust2_MW_MA.png", width = 6, height = 8, units = "in", res = 300)
par(mfrow = c(3,2))
aldex.plot(aldex2_EC, type = "MW", test = "wilcox", all.cex = 0.4, rare.cex = 0.4, called.cex = 0.6, cutoff = 0.05, xlab = "Dispersion", ylab = "Difference")
title(main = "(EC) MW Plot")
aldex.plot(aldex2_EC, type = "MA", test = "wilcox", all.cex = 0.4, rare.cex = 0.4, called.cex = 0.6, cutoff = 0.05, xlab = "Log-ratio abundance", ylab = "Difference")
title(main = "(EC) MA Plot")
aldex.plot(aldex2_KO, type = "MW", test = "wilcox", all.cex = 0.4, rare.cex = 0.4, called.cex = 0.6, cutoff = 0.05, xlab = "Dispersion", ylab = "Difference")
title(main = "(KO) MW Plot")
aldex.plot(aldex2_KO, type = "MA", test = "wilcox", all.cex = 0.4, rare.cex = 0.4, called.cex = 0.6, cutoff = 0.05, xlab = "Relative abundance", ylab = "Difference")
title(main = "(KO) MA Plot")
aldex.plot(aldex2_PW, type = "MW", test = "wilcox", all.cex = 0.4, rare.cex = 0.4, called.cex = 0.6, cutoff = 0.05, xlab = "Dispersion", ylab = "Difference")
title(main = "(PW) MW Plot")
aldex.plot(aldex2_PW, type = "MA", test = "wilcox", all.cex = 0.4, rare.cex = 0.4, called.cex = 0.6, cutoff = 0.05, xlab = "Relative abundance", ylab = "Difference")
title(main = "(PW) MA Plot")
dev.off()
# Observe relationship between effect, difference, and P values
png("ALDEx2_picrust2_P_adjP.png", width = 6, height = 8, units = "in", res = 300)
par(mfrow = c(3,2))
plot(aldex2_EC$effect, aldex2_EC$we.ep, log = "y", cex = 0.4, col = "blue", pch = 19, xlab = "Effect size", ylab = "P value", main = "(EC) Effect size plot")
points(aldex2_EC$effect, aldex2_EC$we.eBH, cex = 0.5, col = "red", pch = 19)
abline(h = 0.05, lty = 2, col = "grey")
legend("bottom", legend = c("P value", "BH-adjusted"), pch = 19, col = c("blue", "red"))
plot(aldex2_EC$diff.btw, aldex2_EC$we.ep, log = "y", cex = 0.4, col = "blue", pch = 19, xlab = "Difference", ylab = "P value", main = "(EC) Volcano plot")
points(aldex2_EC$diff.btw, aldex2_EC$we.eBH, cex = 0.5, col = "red", pch = 19)
abline(h = 0.05, lty = 2, col = "grey")
plot(aldex2_KO$effect, aldex2_KO$we.ep, log = "y", cex = 0.4, col = "blue", pch = 19, xlab = "Effect size", ylab = "P value", main = "(KO) Effect size plot")
points(aldex2_KO$effect, aldex2_KO$we.eBH, cex = 0.5, col = "red", pch = 19)
abline(h = 0.05, lty = 2, col = "grey")
legend("bottom", legend = c("P value", "BH-adjusted"), pch = 19, col = c("blue", "red"))
plot(aldex2_KO$diff.btw, aldex2_KO$we.ep, log = "y", cex = 0.4, col = "blue", pch = 19, xlab = "Difference", ylab = "P value", main = "(KO) Volcano plot")
points(aldex2_KO$diff.btw, aldex2_KO$we.eBH, cex = 0.5, col = "red", pch = 19)
abline(h = 0.05, lty = 2, col = "grey")
plot(aldex2_PW$effect, aldex2_PW$we.ep, log = "y", cex = 0.4, col = "blue", pch = 19, xlab = "Effect size", ylab = "P value", main = "(PW) Effect size plot")
points(aldex2_PW$effect, aldex2_PW$we.eBH, cex = 0.5, col = "red", pch = 19)
abline(h = 0.05, lty = 2, col = "grey")
legend("bottom", legend = c("P value", "BH-adjusted"), pch = 19, col = c("blue", "red"))
plot(aldex2_PW$diff.btw, aldex2_PW$we.ep, log = "y", cex = 0.4, col = "blue", pch = 19, xlab = "Difference", ylab = "P value", main = "(PW) Volcano plot")
points(aldex2_PW$diff.btw, aldex2_PW$we.eBH, cex = 0.5, col = "red", pch = 19)
abline(h = 0.05, lty = 2, col = "grey")
dev.off()
# Merge with map file data
df_EC = aldex2_EC %>% tibble::rownames_to_column(var = "EC") %>% inner_join(mapEC, by = c("EC" = "function")) %>% arrange(EC)
df_KO = aldex2_KO %>% tibble::rownames_to_column(var = "KO") %>% inner_join(mapKO, by = c("KO" = "function")) %>% arrange(KO)
df_PW = aldex2_PW %>% tibble::rownames_to_column(var = "Pathway") %>% inner_join(mapPW, by = c("Pathway" = "pathway")) %>% arrange(Pathway)
# Output to file
write.table(df_EC, file = "ALDEx2_picrust2_EC_results.txt", sep = "\t", quote = F, row.names = F, col.names = T)
write.table(df_KO, file = "ALDEx2_picrust2_KO_results.txt", sep = "\t", quote = F, row.names = F, col.names = T)
write.table(df_PW, file = "ALDEx2_picrust2_Pathway_results.txt", sep = "\t", quote = F, row.names = F, col.names = T)
# Save current workspace
# save.image(file = "image4.RData")