bcbio · lpantano · Aug 14, 2025 · Jul 14, 2025 · Jul 14, 2025 · Jul 14, 2025
diff --git a/.github/workflows/run_scrna_qc.yml b/.github/workflows/run_scrna_qc.yml
@@ -0,0 +1,93 @@
+name: Quality Control
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - '01_quality_assessment/scRNA_QC.qmd'
+  pull_request:
+    branches: [main]
+    paths:
+      - '01_quality_assessment/scRNA_QC.qmd'
+  workflow_dispatch:
+
+jobs:
+  r-qc:
+    runs-on: ubuntu-22.04
+
+    env:
+      RENV_PATHS_ROOT: ~/.local/share/renv  # persistent cache location
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install pandoc
+        run: sudo apt-get update && sudo apt-get install -y pandoc
+
+      - name: Set up R
+        uses: r-lib/actions/setup-r@v2
+
+      - name: Install system dependencies for R packages
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential libcurl4-openssl-dev libssl-dev libxml2-dev libgit2-dev libmagick++-dev libharfbuzz-dev libfribidi-dev libglpk-dev
+        shell: bash
+
+      - name: Cache R packages (renv)
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.RENV_PATHS_ROOT }}
+          key: ${{ runner.os }}-renv-${{ hashFiles('01_quality_assessment/renv.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-renv-
+
+      - name: Set repositories
+        run: Rscript ubuntu.R
+
+      - name: Restore environment from renv.lock
+        run: |
+          install.packages("renv", repos = "https://cloud.r-project.org")
+          renv::restore(prompt = FALSE, lockfile = "01_quality_assessment/renv.lock")
+        shell: Rscript {0}
+
+      - name: Set up Quarto
+        uses: quarto-dev/quarto-actions/setup@v2
+
+      - name: Run quality assessment
+        id: render_qmd
+        run: |
+          cd 01_quality_assessment
+          quarto render scRNA_QC.qmd
+        shell: bash
+
+      - name: Deploy HTML to gh-pages
+        if: success()
+        run: |
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          OUTPUT_FILE="01_quality_assessment/scRNA_QC.html"
+
+          # Create a temporary worktree for gh-pages branch
+          git fetch origin gh-pages || true
+          git worktree add /tmp/gh-pages gh-pages || git worktree add /tmp/gh-pages -b gh-pages
+
+          # Copy the file to the worktree
+          mkdir -p /tmp/gh-pages/01_quality_assessment
+          cp "$OUTPUT_FILE" /tmp/gh-pages/01_quality_assessment/
+
+          cd /tmp/gh-pages
+
+          # Commit and push if there are changes
+          if git diff --quiet && git diff --cached --quiet; then
+            echo "No changes to commit"
+          else
+            git add 01_quality_assessment/scRNA_QC.html
+            git commit -m "Deploy scRNA_QC.html [skip ci]"
+            git push origin gh-pages
+          fi
+
+          # Clean up
+          cd -
+          git worktree remove /tmp/gh-pages
+        shell: bash
diff --git a/.gitignore b/.gitignore
@@ -3,5 +3,6 @@
 .RData
 .Ruserdata
 *.Rproj
-*.html
-
+**/*.html
+**html
+!01_quality_assessment/scRNA_QC.html
diff --git a/01_quality_assessment/renv.lock b/01_quality_assessment/renv.lock
diff --git a/01_quality_assessment/scATAC_QC.Rmd → 01_quality_assessment/scATAC_QC.qmd b/01_quality_assessment/scATAC_QC.Rmd → 01_quality_assessment/scATAC_QC.qmd
@@ -26,10 +26,25 @@ params:
   results_dir: !r file.path("results")
   project_file: ../information.R
   #params_file: params_qc.R
+knitr:
+  opts_chunk:
+    cache: false
+    cache.lazy: false
+    dev: c("png", "pdf")
+    error: true
+    highlight: true
+    message: false
+    prompt: false
+    tidy: false
+    warning: false
+    fig-height: 4
 ---
 
 
-```{r, cache = FALSE, message = FALSE, warning=FALSE}
+```{r }
+#| cache: false
+#| message: false
+#| warning: false
 # This set up the working directory to this file so all files can be found
 library(rstudioapi)
 setwd(fs::path_dir(getSourceEditorContext()$path))
@@ -43,12 +58,16 @@ stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.0.0") >= 0)
 
 This code is in this ![](https://img.shields.io/badge/status-alpha-yellow) revision.
 
-```{r source_params, echo = F}
+```{r source_params}
+#| echo: false
 metadata_fn <- ""
 se_object <- ""
 ```
 
-```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE}
+```{r load_libraries}
+#| cache: false
+#| message: false
+#| warning: false
 library(Seurat)
 library(Signac)
 library(tidyverse)
@@ -59,21 +78,12 @@ library(kableExtra)
 library(qs)
 library(bcbioR)
 ggplot2::theme_set(theme_light(base_size = 14))
-opts_chunk[["set"]](
-  cache = FALSE,
-  cache.lazy = FALSE,
-  dev = c("png", "pdf"),
-  error = TRUE,
-  highlight = TRUE,
-  message = FALSE,
-  prompt = FALSE,
-  tidy = FALSE,
-  warning = FALSE,
-  fig.height = 4)
 ```
 
 
-```{r subchunkify, echo=FALSE, eval=FALSE}
+```{r subchunkify}
+#| echo: false
+#| eval: false
 #' Create sub-chunks for plots
 #'
 #' taken from: https://stackoverflow.com/questions/15365829/dynamic-height-and-width-for-knitr-plots
@@ -156,7 +166,7 @@ ggplot(meta_df, aes(sample_type, fill = sample_type)) +
 ```
 
 
-```{r  show-metadata}
+```{r show-metadata}
 se <- readRDS(se_object) # local
 
 
@@ -186,7 +196,11 @@ We use some of the results from cellranger outputs and the peaks called using MA
 
 ## Read counts per cell
 
-```{r warning=FALSE, message=FALSE, results='asis', fig.width=12}
+```{r}
+#| warning: false
+#| message: false
+#| results: 'asis'
+#| fig-width: 12
 VlnPlot(seurat, features = "nCount_MACS2peaks", ncol = 1, pt.size = 0) +
   scale_fill_cb_friendly() +
   xlab("") +
@@ -196,7 +210,10 @@ cat("\n\n")
 ```
 ## Detected peaks per cell
 
-```{r warning=FALSE, message=FALSE, fig.width=12}
+```{r}
+#| warning: false
+#| message: false
+#| fig-width: 12
 VlnPlot(seurat, features = "nFeature_MACS2peaks", ncol = 1, pt.size = 0) +
   scale_fill_cb_friendly() +
   xlab("") +
@@ -215,7 +232,12 @@ VlnPlot(seurat, features = "nFeature_MACS2peaks", ncol = 1, pt.size = 0) +
 This metric represents the total number of fragments (= reads) mapping within a region of the genome that is predicted to be accessible (= a peak). It's a measure of cellular sequencing depth / complexity. Cells with very few reads may need to be excluded due to low sequencing depth. Cells with extremely high levels may represent doublets, nuclei clumps, or other artefacts. 
 
 
-```{r results='asis', warning=FALSE, message=FALSE, fig.width=12, fig.height=8}
+```{r}
+#| results: 'asis'
+#| warning: false
+#| message: false
+#| fig-width: 12
+#| fig-height: 8
 DefaultAssay(seurat) <- "MACS2peaks"
 
 cat("#### Histogram \n\n")
@@ -227,7 +249,11 @@ [email protected] %>%
 ```
 
 
-```{r results='asis', warning=FALSE, message=FALSE, fig.width=12}
+```{r}
+#| results: 'asis'
+#| warning: false
+#| message: false
+#| fig-width: 12
 cat("\n#### Violin plot\n\n")
 VlnPlot(
   object = seurat,
@@ -245,7 +271,8 @@ cat("\n\n")
 
 It represents the fraction of all fragments that fall within ATAC-seq peaks. Cells with low values (i.e. <15-20%) often represent low-quality cells or technical artifacts that should be removed. Note that this value can be sensitive to the set of peaks used.
 
-```{r results='asis'}
+```{r}
+#| results: 'asis'
 VlnPlot(
   object = seurat,
   features = "pct_reads_in_peaks",
@@ -263,7 +290,8 @@ cat("\n\n")
 The ENCODE project has defined an ATAC-seq targeting score based on the ratio of fragments centered at the TSS to fragments in TSS-flanking regions (see https://www.encodeproject.org/data-standards/terms/). Poor ATAC-seq experiments typically will have a low TSS enrichment score. We can compute this metric for each cell with the TSSEnrichment() function, and the results are stored in metadata under the column name TSS.enrichment.
 
 
-```{r results='asis'}
+```{r}
+#| results: 'asis'
 VlnPlot(
   object = seurat,
   features = "TSS.enrichment",
@@ -275,7 +303,8 @@ The following tabs show the TSS enrichment score distribution for each sample. C
 
 Each plot is split between cells with a high or low global TSS enrichment score (cuffoff at `r params$min_TSS`), to double-check whether cells with lowest enrichment scores still follow the expected pattern or rather need to be excluded.
 
-```{r results='asis'}
+```{r}
+#| results: 'asis'
 seurat$TSS.group <- ifelse(seurat$TSS.enrichment > params$min_TSS, "High", "Low")
 Idents(seurat) <- "sample"
 for (sample in levels(seurat$sample)) {
@@ -290,7 +319,9 @@ for (sample in levels(seurat$sample)) {
 
 The histogram of DNA fragment sizes (determined from the paired-end sequencing reads) should exhibit a strong nucleosome banding pattern corresponding to the length of DNA wrapped around a single nucleosome, i.e peaks at approximately 100bp (nucleosome-free), and mono, di and tri nucleosome-bound peaks at 200, 400 and 600bp. We calculate this per single cell, and quantify the approximate ratio of mononucleosomal to nucleosome-free fragments (stored as nucleosome_signal). Cells with lower nucleosome signal have a higher ratio of nucleosome-free fragments.
 
-```{r warning = FALSE, results='asis'}
+```{r}
+#| warning: false
+#| results: 'asis'
 seurat$nucleosome_group <- ifelse(seurat$nucleosome_signal > 1, "high NS", "low NS")
 for (sample in levels(seurat$sample)) {
   cat("####", sample, "\n\n")
@@ -301,7 +332,8 @@ for (sample in levels(seurat$sample)) {
 # FragmentHistogram(seurat, group.by = 'nucleosome_group', cells = colnames(seurat[, seurat$sample == 'Control']))
 ```
 
-```{r fig.width=12}
+```{r}
+#| fig-width: 12
 VlnPlot(
   object = seurat,
   features = "nucleosome_signal",
@@ -319,7 +351,8 @@ tIt's he ratio of reads in genomic blacklist regions. The [ENCODE project](https
 
 Line is drawn at `r params$max_blacklistratio`. 
 
-```{r fig.width=12}
+```{r}
+#| fig-width: 12
 VlnPlot(
   object = seurat,
   features = "blacklist_fraction",
@@ -343,14 +376,16 @@ The combined steps of TF-IDF followed by SVD are known as latent semantic indexi
 The first LSI component often captures sequencing depth (techni ccal variation) rather than biological variation. If this is the case, the component should be removed from downstream analysis. We can assess the correlation between each LSI component and sequencing depth using the DepthCor() function (see below). 
 Here we see there is a very strong correlation between the first LSI component and the total number of counts for the cell, so we will perform downstream steps without this component.
 
-```{r results='asis'}
+```{r}
+#| results: 'asis'
 DepthCor(seurat, assay = "MACS2peaks")
 cat("\n\n")
 ```
 
 ## UMAP plots
 
-```{r results='asis'}
+```{r}
+#| results: 'asis'
 DimPlot(object = seurat, group.by = "sample", reduction = "umapATAC") +
   scale_color_cb_friendly()