Skip to content

Commit b98f1bb

Browse files
committed
Style code (GHA)
1 parent 4d7e781 commit b98f1bb

File tree

1 file changed

+84
-72
lines changed

1 file changed

+84
-72
lines changed

03_differential_expression/scRNA_pseudobulk.Rmd

Lines changed: 84 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ library(viridis)
6464
library(caTools)
6565
library(shiny)
6666
library(bcbioR)
67-
#library(future)
67+
# library(future)
6868
6969
ggplot2::theme_set(theme_prism(base_size = 12))
7070
# https://grafify-vignettes.netlify.app/colour_palettes.html
@@ -143,14 +143,15 @@ if (isUrl(seurat_obj)) {
143143
}
144144
145145
DefaultAssay(seurat) <- "RNA"
146-
147146
```
148147

149148
After filtering, each sample contributed the following number of cells to the analysis:
150149

151150
```{r meta pre doub}
152-
[email protected] %>% group_by(orig.ident) %>% summarize(n_bins = n()) %>% sanitize_datatable
153-
151+
152+
group_by(orig.ident) %>%
153+
summarize(n_bins = n()) %>%
154+
sanitize_datatable()
154155
```
155156

156157
# Aggregate counts
@@ -175,21 +176,21 @@ To aggregate the counts, we will use the AggregateExpression() function from Seu
175176
seurat$sample <- seurat$orig.ident
176177
177178
bulk <- AggregateExpression(
178-
seurat,
179-
return.seurat = T,
180-
assays = "RNA",
181-
group.by = c("sample",column )
179+
seurat,
180+
return.seurat = T,
181+
assays = "RNA",
182+
group.by = c("sample", column)
182183
)
183184
```
184185

185186
## Add number of cells per sample per cluster to the metadata
186187

187188
```{r}
188189
# Number of cells by sample and celltype
189-
n_cells <- [email protected] %>%
190-
dplyr::count(sample, .data[[column]]) %>%
191-
dplyr::rename("n_cells"="n")
192-
#n_cells$sample <- str_replace(n_cells$sample, "_", "-")
190+
n_cells <- [email protected] %>%
191+
dplyr::count(sample, .data[[column]]) %>%
192+
dplyr::rename("n_cells" = "n")
193+
# n_cells$sample <- str_replace(n_cells$sample, "_", "-")
193194
194195
## extra check if aggregated sample names start with numbers
195196
n_cells$sample <- ifelse(grepl("^\\d", n_cells$sample), paste0("g", n_cells$sample), n_cells$sample)
@@ -198,7 +199,7 @@ n_cells$sample <- ifelse(grepl("^\\d", n_cells$sample), paste0("g", n_cells$samp
198199
meta_bulk <- [email protected]
199200
meta_bulk$sample <- str_replace_all(meta_bulk$sample, "-", "_")
200201
201-
meta_bulk <- meta_bulk %>% left_join(n_cells, by=c("sample", column))
202+
meta_bulk <- meta_bulk %>% left_join(n_cells, by = c("sample", column))
202203
203204
rownames(meta_bulk) <- meta_bulk$orig.ident
204205
[email protected] <- meta_bulk
@@ -209,7 +210,9 @@ [email protected] <- meta_bulk
209210
210211
stopifnot(all(Cells(bulk) == row.names([email protected])))
211212
212-
[email protected] %>% head() %>% sanitize_datatable()
213+
214+
head() %>%
215+
sanitize_datatable()
213216
```
214217

215218
# DE analysis with DESeq2
@@ -225,12 +228,12 @@ Idents(object = bulk) <- column
225228
Before moving on to a pseudobulk DGE analysis, it is important to identify how many cells we aggregated for each sample. We need to make sure that we have enough cells per sample after subsetting to one celltype. We recommend 50 cells per sample to move forward with confidence.
226229

227230
```{r}
228-
ggplot([email protected], aes(x=orig.ident, y=n_cells)) +
229-
geom_bar(stat="identity", color="black", aes(fill=.data[[column]])) +
230-
theme_classic() +
231-
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
232-
labs(x="Sample name", y="Number of cells") +
233-
geom_text(aes(label=n_cells), vjust=-0.5)
231+
ggplot([email protected], aes(x = orig.ident, y = n_cells)) +
232+
geom_bar(stat = "identity", color = "black", aes(fill = .data[[column]])) +
233+
theme_classic() +
234+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
235+
labs(x = "Sample name", y = "Number of cells") +
236+
geom_text(aes(label = n_cells), vjust = -0.5)
234237
```
235238

236239
## Run DE analysis
@@ -242,7 +245,7 @@ Before fitting the model, we often look at a metric called dispersion, which is
242245
We use the below dispersion plot, which should show an inverse relationship between dispersion and mean expression, to get an idea of whether our data is a good fit for the model.
243246

244247
```{r}
245-
cluster_counts <- t(FetchData(bulk, layer="counts", vars=rownames(bulk)))
248+
cluster_counts <- t(FetchData(bulk, layer = "counts", vars = rownames(bulk)))
246249
formula <- as.formula(paste0("~ ", " + ", column))
247250
248251
@@ -351,21 +354,22 @@ for (contrast in names(de_list)) {
351354
### Run pheatmap using the metadata data frame for the annotation
352355
for (contrast in names(de_list)) {
353356
cat("### ", contrast, "\n\n")
354-
res_sig = de_list[[contrast]][["sig"]]
355-
ma=norm_matrix[res_sig$gene_id,]
356-
if (length(res_sig$gene_id) > 2){
357-
p1=pheatmap(ma,
358-
color = inferno(10),
359-
cluster_rows = T,
360-
show_rownames = F,
361-
border_color = NA,
362-
fontsize = 10,
363-
scale = "row",
364-
fontsize_row = 10,
365-
height = 20)
357+
res_sig <- de_list[[contrast]][["sig"]]
358+
ma <- norm_matrix[res_sig$gene_id, ]
359+
if (length(res_sig$gene_id) > 2) {
360+
p1 <- pheatmap(ma,
361+
color = inferno(10),
362+
cluster_rows = T,
363+
show_rownames = F,
364+
border_color = NA,
365+
fontsize = 10,
366+
scale = "row",
367+
fontsize_row = 10,
368+
height = 20
369+
)
366370
print(p1)
367371
} else {
368-
print('Need >2 DEGs to make heatmap')
372+
print("Need >2 DEGs to make heatmap")
369373
}
370374
cat("\n\n")
371375
}
@@ -374,12 +378,14 @@ for (contrast in names(de_list)) {
374378
## Differentially Expressed Genes {.tabset}
375379

376380
```{r sig_genes_table, results='asis'}
377-
dt_list=list()
378-
for (contrast in names(de_list)){
379-
res_sig=de_list[[contrast]][["sig"]]
380-
dt_list=c(dt_list,
381-
list(h3(contrast)),
382-
list(DT::datatable(res_sig)))
381+
dt_list <- list()
382+
for (contrast in names(de_list)) {
383+
res_sig <- de_list[[contrast]][["sig"]]
384+
dt_list <- c(
385+
dt_list,
386+
list(h3(contrast)),
387+
list(DT::datatable(res_sig))
388+
)
383389
}
384390
tagList(dt_list)
385391
```
@@ -391,28 +397,30 @@ n <- 16
391397
392398
for (contrast in names(de_list)) {
393399
cat("### ", contrast, "\n\n")
394-
res_sig = de_list[[contrast]][["sig"]]
395-
396-
if(nrow(res_sig) > 0){
397-
top_n <- res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
400+
res_sig <- de_list[[contrast]][["sig"]]
401+
402+
if (nrow(res_sig) > 0) {
403+
top_n <- res_sig %>%
404+
slice_min(order_by = padj, n = n, with_ties = F) %>%
398405
dplyr::select(gene_id)
399-
top_n_exp <- vsd_matrix %>% as.data.frame() %>%
400-
rownames_to_column('gene_id') %>%
401-
# dplyr::select(-group, -group_name) %>%
402-
pivot_longer(!gene_id, names_to = 'sample', values_to = 'normalized_counts') %>%
406+
top_n_exp <- vsd_matrix %>%
407+
as.data.frame() %>%
408+
rownames_to_column("gene_id") %>%
409+
# dplyr::select(-group, -group_name) %>%
410+
pivot_longer(!gene_id, names_to = "sample", values_to = "normalized_counts") %>%
403411
right_join(top_n, relationship = "many-to-many") %>%
404-
left_join(meta_bulk, by = c("sample"="orig.ident")) #%>%
405-
# filter(.data[[column]] %in% contrasts[[contrast]][2:3]) # can uncomment this line if you want to include only the groups being compared
406-
407-
p1=ggplot(top_n_exp, aes(x = .data[[column]], y = normalized_counts)) +
408-
geom_boxplot(outlier.shape = NA, linewidth=0.5, color="grey") +
412+
left_join(meta_bulk, by = c("sample" = "orig.ident")) # %>%
413+
# filter(.data[[column]] %in% contrasts[[contrast]][2:3]) # can uncomment this line if you want to include only the groups being compared
414+
415+
p1 <- ggplot(top_n_exp, aes(x = .data[[column]], y = normalized_counts)) +
416+
geom_boxplot(outlier.shape = NA, linewidth = 0.5, color = "grey") +
409417
geom_point() +
410-
facet_wrap(~gene_id, scales = 'free_y') +
411-
ggtitle(str_interp('Expression of Top ${n} DEGs - Pseudobulk')) +
412-
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
418+
facet_wrap(~gene_id, scales = "free_y") +
419+
ggtitle(str_interp("Expression of Top ${n} DEGs - Pseudobulk")) +
420+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
413421
print(p1)
414422
} else {
415-
print('Need at least 1 DEG to make plot')
423+
print("Need at least 1 DEG to make plot")
416424
}
417425
cat("\n\n")
418426
}
@@ -424,31 +432,35 @@ for (contrast in names(de_list)) {
424432
n <- 16
425433
426434
# subset seurat object to include only sig DE genes, helps with speed
427-
top_n_all <- lapply(names(de_list), function(contrast){
428-
res_sig = de_list[[contrast]][["sig"]]
429-
res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
430-
dplyr::select(gene_id) %>% mutate(contrast = contrast)
435+
top_n_all <- lapply(names(de_list), function(contrast) {
436+
res_sig <- de_list[[contrast]][["sig"]]
437+
res_sig %>%
438+
slice_min(order_by = padj, n = n, with_ties = F) %>%
439+
dplyr::select(gene_id) %>%
440+
mutate(contrast = contrast)
431441
}) %>% bind_rows()
432442
433443
seurat_topn_all <- subset(seurat, features = unique(top_n_all$gene_id))
434444
Idents(seurat_topn_all) <- column
435445
436-
for (contrast in names(de_list)){
446+
for (contrast in names(de_list)) {
437447
cat("### ", contrast, "{.tabset} \n\n")
438-
res_sig = de_list[[contrast]][["sig"]]
439-
if(nrow(res_sig) > 0){
440-
top_n <- res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
448+
res_sig <- de_list[[contrast]][["sig"]]
449+
if (nrow(res_sig) > 0) {
450+
top_n <- res_sig %>%
451+
slice_min(order_by = padj, n = n, with_ties = F) %>%
441452
dplyr::select(gene_id)
442-
443-
p <- DotPlot(seurat_topn_all,
444-
# idents = contrasts[[contrast]][2:3], # can uncomment this line if you want to include only the groups being compared
445-
features = top_n$gene_id) &
446-
scale_color_cb_friendly(discrete = F, palette = 'heatmap') &
453+
454+
p <- DotPlot(seurat_topn_all,
455+
# idents = contrasts[[contrast]][2:3], # can uncomment this line if you want to include only the groups being compared
456+
features = top_n$gene_id
457+
) &
458+
scale_color_cb_friendly(discrete = F, palette = "heatmap") &
447459
scale_x_discrete(guide = guide_axis(angle = 45))
448-
460+
449461
print(p)
450462
} else {
451-
print('Need at least 1 DEG to make plot')
463+
print("Need at least 1 DEG to make plot")
452464
}
453465
454466
cat("\n\n")

0 commit comments

Comments
 (0)