Skip to content

Commit

Permalink
[r] Add pbmc3k dataset helper (#792)
Browse files Browse the repository at this point in the history
* Add method for creating seurat pbmc3k dataset
* Update news
* Bump version and update docs
  • Loading branch information
aaronwolen authored Jan 23, 2023
1 parent 2a148c5 commit 8dda13a
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 2 deletions.
4 changes: 2 additions & 2 deletions apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
from and export to in-memory formats used by popular toolchains like
'Seurat', 'Bioconductor', and even 'AnnData' using the companion Python
package.
Version: 0.1.22
Version: 0.1.22.9000
Authors@R: c(
person(given = "Aaron",
family = "Wolen",
Expand Down Expand Up @@ -45,7 +45,7 @@ Imports:
urltools,
vctrs
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
Suggests:
rmarkdown,
knitr,
Expand Down
5 changes: 5 additions & 0 deletions apis/r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ export(SOMACollection)
export(TileDBArray)
export(TileDBGroup)
export(TileDBObject)
export(dataset_seurat_pbmc3k)
import(tiledb)
importFrom(Matrix,mat2triplet)
importFrom(Matrix,nnzero)
importFrom(Matrix,readMM)
importFrom(Matrix,sparseMatrix)
importFrom(R6,R6Class)
importFrom(SeuratObject,AddMetaData)
Expand All @@ -37,4 +39,7 @@ importFrom(glue,glue_collapse)
importFrom(methods,slot)
importFrom(urltools,url_compose)
importFrom(urltools,url_parse)
importFrom(utils,download.file)
importFrom(utils,modifyList)
importFrom(utils,read.table)
importFrom(utils,untar)
6 changes: 6 additions & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# tiledbsoma (development version)

## Features

* New function `dataset_seurat_pbmc3k()` to download the pbmc3k dataset from 10X and import as a `Seurat` object without requiring any extra dependencies.

# tiledbsoma 0.1.19

## Changes
Expand Down
46 changes: 46 additions & 0 deletions apis/r/R/datasets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Seurat 3k PBMCs from 10x Genomics
#'
#' Create a [`SeuratObject::Seurat`] object containing the widely used 3k PBMCs
#' dataset from 10x Genomics.
#'
#' @returns a [`SeuratObject::Seurat`] object
#' @seealso https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.1.0/pbmc3k
#' @importFrom Matrix readMM
#' @importFrom utils download.file read.table untar
#' @export

dataset_seurat_pbmc3k <- function() {
url <- "https://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"

tarfile <- file.path(tempdir(), basename(url))
tardir <- sub("\\.tar\\.gz$", "", tarfile)
datadir <- file.path(tardir, "filtered_gene_bc_matrices", "hg19")

if (!dir.exists(tardir)) {
if (!file.exists(tarfile)) {
utils::download.file(url = url, destfile = tarfile)
}
utils::untar(tarfile, exdir = tardir)
}

mat <- Matrix::readMM(file.path(datadir, "matrix.mtx"))
genes <- utils::read.table(
file = file.path(datadir, "genes.tsv"),
header = FALSE,
col.names = c("id", "gene_name")
)
barcodes <- utils::read.table(
file = file.path(datadir, "barcodes.tsv"),
header = FALSE,
col.names = "id"
)
dimnames(mat) <- list(genes$id, barcodes$id)

object <- SeuratObject::CreateSeuratObject(counts = mat)
object[["RNA"]] <- SeuratObject::AddMetaData(
object = object[["RNA"]],
metadata = genes$gene_name,
col.name = "gene_name"
)
object
}
18 changes: 18 additions & 0 deletions apis/r/man/dataset_seurat_pbmc3k.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8dda13a

Please sign in to comment.