Skip to content

Commit c94c07d

Browse files
committed
For #15: add secondary ID/DIV plotting method
Adds a --plotmethod to the get_island script, and code to plot_all, to support a method for binning points to plot instead of using KDE.
1 parent 53a65c7 commit c94c07d

File tree

2 files changed

+68
-43
lines changed

2 files changed

+68
-43
lines changed

lineage/2.2-get_island_interactive.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env Rscript
22

33

4-
"Usage: 2.2-get_island_interactive.R <idDivFile> [ --plot <ids.list> ] [ --mab <abID>... ] [ --outdir <output/tables> --output <islandSeqs.txt> --reference <idDivFile> ]
4+
"Usage: 2.2-get_island_interactive.R <idDivFile> [ --plot <ids.list> ] [ --mab <abID>... ] [ --outdir <output/tables> --output <islandSeqs.txt> --reference <idDivFile> --plotmethod <plotmethod>]
55
66
Options:
77
-h --help Show this documentation.
@@ -18,6 +18,10 @@ Options:
1818
[default: islandSeqs]
1919
--reference <idDivFile> Other data points to display on plots, eg other members of
2020
the antibody lineage of interest.
21+
--plotmethod <plotmethod> Plotting method to use. 'original' uses kernel density
22+
estimation to show smoothed distribution of sequences.
23+
'binned' directly plots counts of sequences
24+
for tiled ID/DIV regions [default: original]
2125
2226
2327
Created by Chaim A Schramm 2016-08-30.
@@ -200,7 +204,7 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin
200204

201205
#generate initial plot; supress color bar and increase size of plot title from default
202206
# keep title separate, because we'll want to use different titles at different stages
203-
pp <- plot_all(smalldata, mab.R, mab, "germline V") + guides(fill=F) + theme( plot.title=element_text(size = 18) )
207+
pp <- plot_all(smalldata, mab.R, mab, "germline V", plotmethod=opts$plotmethod) + guides(fill=F) + theme( plot.title=element_text(size = 18) )
204208

205209
#want referents to look different on interactive and final figure (mostly about size)
206210
# so save this first, then add

plottingFunctions.R

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
#split from 4.3 by Chaim A Schramm 2016-08-30.
32

43
library(ggplot2)
@@ -10,46 +9,68 @@ library(MASS)
109
# PLOTTING FUNCTION
1110
####################################
1211

13-
plot_all <- function (data, native, pretty, heavy, plot_title=NULL, color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') {
14-
15-
my_x<-paste("% divergence from", heavy)
16-
my_y<-paste("% ID to", pretty)
17-
18-
if (color) {
19-
my_colors=rev(rainbow(15,end=4/6))
20-
} else {
21-
my_colors=rev(gray.colors(5))
22-
}
23-
24-
g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100))
25-
densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z))
26-
b <- (sum(g$z) / length(data$germ_div))/2
27-
t <- b * 10^4
28-
if ( max(g$z) > t ) {
29-
t <- b * 10^ceiling( log10( max(g$z)/b ) )
30-
}
31-
r <- 10^seq(log10(b), log10(t), 1)
32-
33-
p<-ggplot(densf,aes(x,y,z=z)) +
34-
geom_tile(aes(fill = z)) +
35-
scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t),
36-
na.value="white", breaks=r, labels=signif(r/b,1),
37-
guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0),
38-
barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+
39-
theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) +
40-
scale_y_continuous(expand=c(0,1),limits=c(50,101)) +
41-
theme(plot.background = element_blank(),panel.grid.major = element_blank(),
42-
axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5),
43-
panel.grid.minor = element_blank(), axis.text = element_text(size = 6),
44-
axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"),
45-
plot.title = element_text(size = 8) )
46-
47-
if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) }
48-
if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) }
49-
if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) }
50-
if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) }
51-
52-
p
12+
plot_all <- function (data, native, pretty, heavy, plot_title=NULL, plotmethod="original", color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') {
13+
14+
my_x<-paste("% divergence from", heavy)
15+
my_y<-paste("% ID to", pretty)
16+
17+
if (color) {
18+
my_colors=rev(rainbow(15,end=4/6))
19+
} else {
20+
my_colors=rev(gray.colors(5))
21+
}
22+
23+
if (plotmethod == "binned") {
24+
# Count sequences by ID/DIV location by binning them into discrete tiles
25+
# (using 100 increments from 0 to 60 for identity, and 100 increments from
26+
# 40 to 100 for divergence).
27+
xbreaks <- seq(0, 60, length.out = 100)
28+
ybreaks <- seq(40, 100, length.out = 100)
29+
densf <- as.data.frame(table(
30+
cut(data$germ_div, breaks = xbreaks, include.lowest = TRUE),
31+
cut(data[[native]], breaks = ybreaks, include.lowest = TRUE)))
32+
# Set up the same variables as the original method uses
33+
densf$x <- xbreaks[densf$Var1]
34+
densf$y <- ybreaks[densf$Var2]
35+
densf$z <- densf$Freq
36+
b <- 1
37+
t <- b * 10^4
38+
if ( max(densf$z) > t ) {
39+
t <- b * 10^ceiling( log10( max(densf$z) ) )
40+
}
41+
r <- 10^seq(log10(b), log10(t), 1)
42+
} else {
43+
# The original counting method
44+
g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100))
45+
densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z))
46+
b <- (sum(g$z) / length(data$germ_div))/2
47+
t <- b * 10^4
48+
if ( max(g$z) > t ) {
49+
t <- b * 10^ceiling( log10( max(g$z)/b ) )
50+
}
51+
r <- 10^seq(log10(b), log10(t), 1)
52+
}
53+
54+
p<-ggplot(densf,aes(x,y,z=z)) +
55+
geom_tile(aes(fill = z)) +
56+
scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t),
57+
na.value="white", breaks=r, labels=signif(r/b,1),
58+
guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0),
59+
barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+
60+
theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) +
61+
scale_y_continuous(expand=c(0,1),limits=c(50,101)) +
62+
theme(plot.background = element_blank(),panel.grid.major = element_blank(),
63+
axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5),
64+
panel.grid.minor = element_blank(), axis.text = element_text(size = 6),
65+
axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"),
66+
plot.title = element_text(size = 8) )
67+
68+
if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) }
69+
if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) }
70+
if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) }
71+
if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) }
72+
73+
p
5374
}
5475

5576

0 commit comments

Comments
 (0)