@@ -64,7 +64,7 @@ library(viridis)
64
64
library(caTools)
65
65
library(shiny)
66
66
library(bcbioR)
67
- #library(future)
67
+ # library(future)
68
68
69
69
ggplot2::theme_set(theme_prism(base_size = 12))
70
70
# https://grafify-vignettes.netlify.app/colour_palettes.html
@@ -143,14 +143,15 @@ if (isUrl(seurat_obj)) {
143
143
}
144
144
145
145
DefaultAssay(seurat) <- "RNA"
146
-
147
146
```
148
147
149
148
After filtering, each sample contributed the following number of cells to the analysis:
150
149
151
150
``` {r meta pre doub}
152
- [email protected] %>% group_by(orig.ident) %>% summarize(n_bins = n()) %>% sanitize_datatable
153
-
151
+
152
+ group_by(orig.ident) %>%
153
+ summarize(n_bins = n()) %>%
154
+ sanitize_datatable()
154
155
```
155
156
156
157
# Aggregate counts
@@ -175,21 +176,21 @@ To aggregate the counts, we will use the AggregateExpression() function from Seu
175
176
seurat$sample <- seurat$orig.ident
176
177
177
178
bulk <- AggregateExpression(
178
- seurat,
179
- return.seurat = T,
180
- assays = "RNA",
181
- group.by = c("sample",column )
179
+ seurat,
180
+ return.seurat = T,
181
+ assays = "RNA",
182
+ group.by = c("sample", column )
182
183
)
183
184
```
184
185
185
186
## Add number of cells per sample per cluster to the metadata
186
187
187
188
``` {r}
188
189
# Number of cells by sample and celltype
189
-
190
- dplyr::count(sample, .data[[column]]) %>%
191
- dplyr::rename("n_cells"= "n")
192
- #n_cells$sample <- str_replace(n_cells$sample, "_", "-")
190
+
191
+ dplyr::count(sample, .data[[column]]) %>%
192
+ dplyr::rename("n_cells" = "n")
193
+ # n_cells$sample <- str_replace(n_cells$sample, "_", "-")
193
194
194
195
## extra check if aggregated sample names start with numbers
195
196
n_cells$sample <- ifelse(grepl("^\\d", n_cells$sample), paste0("g", n_cells$sample), n_cells$sample)
@@ -198,7 +199,7 @@ n_cells$sample <- ifelse(grepl("^\\d", n_cells$sample), paste0("g", n_cells$samp
198
199
199
200
meta_bulk$sample <- str_replace_all(meta_bulk$sample, "-", "_")
200
201
201
- meta_bulk <- meta_bulk %>% left_join(n_cells, by= c("sample", column))
202
+ meta_bulk <- meta_bulk %>% left_join(n_cells, by = c("sample", column))
202
203
203
204
rownames(meta_bulk) <- meta_bulk$orig.ident
204
205
209
210
210
211
stopifnot(all(Cells(bulk) == row.names([email protected] )))
211
212
212
- [email protected] %>% head() %>% sanitize_datatable()
213
+
214
+ head() %>%
215
+ sanitize_datatable()
213
216
```
214
217
215
218
# DE analysis with DESeq2
@@ -225,12 +228,12 @@ Idents(object = bulk) <- column
225
228
Before moving on to a pseudobulk DGE analysis, it is important to identify how many cells we aggregated for each sample. We need to make sure that we have enough cells per sample after subsetting to one celltype. We recommend 50 cells per sample to move forward with confidence.
226
229
227
230
``` {r}
228
- ggplot([email protected] , aes(x= orig.ident, y= n_cells)) +
229
- geom_bar(stat= "identity", color= "black", aes(fill= .data[[column]])) +
230
- theme_classic() +
231
- theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
232
- labs(x= "Sample name", y= "Number of cells") +
233
- geom_text(aes(label= n_cells), vjust= -0.5)
231
+ ggplot([email protected] , aes(x = orig.ident, y = n_cells)) +
232
+ geom_bar(stat = "identity", color = "black", aes(fill = .data[[column]])) +
233
+ theme_classic() +
234
+ theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
235
+ labs(x = "Sample name", y = "Number of cells") +
236
+ geom_text(aes(label = n_cells), vjust = -0.5)
234
237
```
235
238
236
239
## Run DE analysis
@@ -242,7 +245,7 @@ Before fitting the model, we often look at a metric called dispersion, which is
242
245
We use the below dispersion plot, which should show an inverse relationship between dispersion and mean expression, to get an idea of whether our data is a good fit for the model.
243
246
244
247
``` {r}
245
- cluster_counts <- t(FetchData(bulk, layer= "counts", vars= rownames(bulk)))
248
+ cluster_counts <- t(FetchData(bulk, layer = "counts", vars = rownames(bulk)))
246
249
formula <- as.formula(paste0("~ ", " + ", column))
247
250
248
251
@@ -351,21 +354,22 @@ for (contrast in names(de_list)) {
351
354
### Run pheatmap using the metadata data frame for the annotation
352
355
for (contrast in names(de_list)) {
353
356
cat("### ", contrast, "\n\n")
354
- res_sig = de_list[[contrast]][["sig"]]
355
- ma=norm_matrix[res_sig$gene_id,]
356
- if (length(res_sig$gene_id) > 2){
357
- p1=pheatmap(ma,
358
- color = inferno(10),
359
- cluster_rows = T,
360
- show_rownames = F,
361
- border_color = NA,
362
- fontsize = 10,
363
- scale = "row",
364
- fontsize_row = 10,
365
- height = 20)
357
+ res_sig <- de_list[[contrast]][["sig"]]
358
+ ma <- norm_matrix[res_sig$gene_id, ]
359
+ if (length(res_sig$gene_id) > 2) {
360
+ p1 <- pheatmap(ma,
361
+ color = inferno(10),
362
+ cluster_rows = T,
363
+ show_rownames = F,
364
+ border_color = NA,
365
+ fontsize = 10,
366
+ scale = "row",
367
+ fontsize_row = 10,
368
+ height = 20
369
+ )
366
370
print(p1)
367
371
} else {
368
- print(' Need >2 DEGs to make heatmap' )
372
+ print(" Need >2 DEGs to make heatmap" )
369
373
}
370
374
cat("\n\n")
371
375
}
@@ -374,12 +378,14 @@ for (contrast in names(de_list)) {
374
378
## Differentially Expressed Genes {.tabset}
375
379
376
380
``` {r sig_genes_table, results='asis'}
377
- dt_list=list()
378
- for (contrast in names(de_list)){
379
- res_sig=de_list[[contrast]][["sig"]]
380
- dt_list=c(dt_list,
381
- list(h3(contrast)),
382
- list(DT::datatable(res_sig)))
381
+ dt_list <- list()
382
+ for (contrast in names(de_list)) {
383
+ res_sig <- de_list[[contrast]][["sig"]]
384
+ dt_list <- c(
385
+ dt_list,
386
+ list(h3(contrast)),
387
+ list(DT::datatable(res_sig))
388
+ )
383
389
}
384
390
tagList(dt_list)
385
391
```
@@ -391,28 +397,30 @@ n <- 16
391
397
392
398
for (contrast in names(de_list)) {
393
399
cat("### ", contrast, "\n\n")
394
- res_sig = de_list[[contrast]][["sig"]]
395
-
396
- if(nrow(res_sig) > 0){
397
- top_n <- res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
400
+ res_sig <- de_list[[contrast]][["sig"]]
401
+
402
+ if (nrow(res_sig) > 0) {
403
+ top_n <- res_sig %>%
404
+ slice_min(order_by = padj, n = n, with_ties = F) %>%
398
405
dplyr::select(gene_id)
399
- top_n_exp <- vsd_matrix %>% as.data.frame() %>%
400
- rownames_to_column('gene_id') %>%
401
- # dplyr::select(-group, -group_name) %>%
402
- pivot_longer(!gene_id, names_to = 'sample', values_to = 'normalized_counts') %>%
406
+ top_n_exp <- vsd_matrix %>%
407
+ as.data.frame() %>%
408
+ rownames_to_column("gene_id") %>%
409
+ # dplyr::select(-group, -group_name) %>%
410
+ pivot_longer(!gene_id, names_to = "sample", values_to = "normalized_counts") %>%
403
411
right_join(top_n, relationship = "many-to-many") %>%
404
- left_join(meta_bulk, by = c("sample"= "orig.ident")) #%>%
405
- # filter(.data[[column]] %in% contrasts[[contrast]][2:3]) # can uncomment this line if you want to include only the groups being compared
406
-
407
- p1= ggplot(top_n_exp, aes(x = .data[[column]], y = normalized_counts)) +
408
- geom_boxplot(outlier.shape = NA, linewidth= 0.5, color= "grey") +
412
+ left_join(meta_bulk, by = c("sample" = "orig.ident")) # %>%
413
+ # filter(.data[[column]] %in% contrasts[[contrast]][2:3]) # can uncomment this line if you want to include only the groups being compared
414
+
415
+ p1 <- ggplot(top_n_exp, aes(x = .data[[column]], y = normalized_counts)) +
416
+ geom_boxplot(outlier.shape = NA, linewidth = 0.5, color = "grey") +
409
417
geom_point() +
410
- facet_wrap(~gene_id, scales = ' free_y' ) +
411
- ggtitle(str_interp(' Expression of Top ${n} DEGs - Pseudobulk' )) +
412
- theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 1))
418
+ facet_wrap(~gene_id, scales = " free_y" ) +
419
+ ggtitle(str_interp(" Expression of Top ${n} DEGs - Pseudobulk" )) +
420
+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
413
421
print(p1)
414
422
} else {
415
- print(' Need at least 1 DEG to make plot' )
423
+ print(" Need at least 1 DEG to make plot" )
416
424
}
417
425
cat("\n\n")
418
426
}
@@ -424,31 +432,35 @@ for (contrast in names(de_list)) {
424
432
n <- 16
425
433
426
434
# subset seurat object to include only sig DE genes, helps with speed
427
- top_n_all <- lapply(names(de_list), function(contrast){
428
- res_sig = de_list[[contrast]][["sig"]]
429
- res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
430
- dplyr::select(gene_id) %>% mutate(contrast = contrast)
435
+ top_n_all <- lapply(names(de_list), function(contrast) {
436
+ res_sig <- de_list[[contrast]][["sig"]]
437
+ res_sig %>%
438
+ slice_min(order_by = padj, n = n, with_ties = F) %>%
439
+ dplyr::select(gene_id) %>%
440
+ mutate(contrast = contrast)
431
441
}) %>% bind_rows()
432
442
433
443
seurat_topn_all <- subset(seurat, features = unique(top_n_all$gene_id))
434
444
Idents(seurat_topn_all) <- column
435
445
436
- for (contrast in names(de_list)){
446
+ for (contrast in names(de_list)) {
437
447
cat("### ", contrast, "{.tabset} \n\n")
438
- res_sig = de_list[[contrast]][["sig"]]
439
- if(nrow(res_sig) > 0){
440
- top_n <- res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
448
+ res_sig <- de_list[[contrast]][["sig"]]
449
+ if (nrow(res_sig) > 0) {
450
+ top_n <- res_sig %>%
451
+ slice_min(order_by = padj, n = n, with_ties = F) %>%
441
452
dplyr::select(gene_id)
442
-
443
- p <- DotPlot(seurat_topn_all,
444
- # idents = contrasts[[contrast]][2:3], # can uncomment this line if you want to include only the groups being compared
445
- features = top_n$gene_id) &
446
- scale_color_cb_friendly(discrete = F, palette = 'heatmap') &
453
+
454
+ p <- DotPlot(seurat_topn_all,
455
+ # idents = contrasts[[contrast]][2:3], # can uncomment this line if you want to include only the groups being compared
456
+ features = top_n$gene_id
457
+ ) &
458
+ scale_color_cb_friendly(discrete = F, palette = "heatmap") &
447
459
scale_x_discrete(guide = guide_axis(angle = 45))
448
-
460
+
449
461
print(p)
450
462
} else {
451
- print(' Need at least 1 DEG to make plot' )
463
+ print(" Need at least 1 DEG to make plot" )
452
464
}
453
465
454
466
cat("\n\n")
0 commit comments