diff --git a/book/disk_based/disk_based_pipelines.html b/book/disk_based/disk_based_pipelines.html index 106b36c..c81a83b 100644 --- a/book/disk_based/disk_based_pipelines.html +++ b/book/disk_based/disk_based_pipelines.html @@ -555,7 +555,7 @@

docker pull berombau/polygloty-docker:latest
 docker run -it -v $(pwd)/usecase:/app/usecase -v $(pwd)/book:/app/book berombau/polygloty-docker:latest pixi run pipeline

Another approach is to use multi-package containers. Tools like Multi-Package BioContainers and Seqera Containers can make this quick and easy, by allowing for custom combinations of packages.

-

You can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Nextflow or Snakemake to manage the pipeline for you.

+

You can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Viash, Nextflow or Snakemake to manage the pipeline for you.

diff --git a/book/in_memory/reticulate.html b/book/in_memory/reticulate.html index 3f3fee3..508f61e 100644 --- a/book/in_memory/reticulate.html +++ b/book/in_memory/reticulate.html @@ -322,7 +322,7 @@

6 Reticulate: bas
rd$choice(example)
-
[1] 2
+
[1] 3
bi$list(bi$reversed(example))
diff --git a/book/in_memory/rpy2.html b/book/in_memory/rpy2.html index 24d8e94..2c5dc7a 100644 --- a/book/in_memory/rpy2.html +++ b/book/in_memory/rpy2.html @@ -379,32 +379,32 @@

We will showcase how to use anndata2ri to convert an anndata object to a SingleCellExperiment object and vice versa as well:

import anndata as ad
-import scanpy.datasets as scd
-
-
Matplotlib is building the font cache; this may take a moment.
-
-
import anndata2ri
-
-adata_paul = scd.paul15()
+import scanpy.datasets as scd + +import anndata2ri + +adata_paul = scd.paul15()

   0%|          | 0.00/9.82M [00:00<?, ?B/s]
-  0%|          | 16.0k/9.82M [00:00<01:04, 160kB/s]
-  0%|          | 32.0k/9.82M [00:00<01:04, 160kB/s]
-  1%|          | 96.0k/9.82M [00:00<00:27, 374kB/s]
-  2%|1         | 192k/9.82M [00:00<00:16, 596kB/s] 
-  4%|3         | 400k/9.82M [00:00<00:08, 1.11MB/s]
-  8%|8         | 816k/9.82M [00:00<00:04, 2.11MB/s]
- 16%|#6        | 1.62M/9.82M [00:00<00:02, 4.10MB/s]
- 33%|###3      | 3.25M/9.82M [00:00<00:00, 7.97MB/s]
- 63%|######3   | 6.23M/9.82M [00:00<00:00, 14.8MB/s]
- 94%|#########3| 9.22M/9.82M [00:01<00:00, 19.3MB/s]
-100%|##########| 9.82M/9.82M [00:01<00:00, 9.71MB/s]
+ 0%| | 8.00k/9.82M [00:00<03:13, 53.3kB/s] + 0%| | 32.0k/9.82M [00:00<01:30, 114kB/s] + 1%| | 96.0k/9.82M [00:00<00:40, 250kB/s] + 2%|1 | 200k/9.82M [00:00<00:24, 416kB/s] + 4%|4 | 408k/9.82M [00:00<00:13, 749kB/s] + 8%|8 | 840k/9.82M [00:00<00:06, 1.44MB/s] + 17%|#6 | 1.65M/9.82M [00:01<00:03, 2.75MB/s] + 30%|##9 | 2.91M/9.82M [00:01<00:01, 4.35MB/s] + 46%|####6 | 4.55M/9.82M [00:01<00:00, 6.55MB/s] + 67%|######6 | 6.55M/9.82M [00:01<00:00, 8.60MB/s] + 88%|########7 | 8.64M/9.82M [00:01<00:00, 9.21MB/s] + 93%|#########2| 9.09M/9.82M [00:01<00:00, 8.04MB/s] +100%|##########| 9.82M/9.82M [00:01<00:00, 5.35MB/s]
-

-with anndata2ri.converter.context():
-    sce = anndata2ri.py2rpy(adata_paul)
-    ad2 = anndata2ri.rpy2py(sce)
+

+with anndata2ri.converter.context():
+    sce = anndata2ri.py2rpy(adata_paul)
+    ad2 = anndata2ri.rpy2py(sce)

@@ -412,87 +412,87 @@

One of the most useful ways to take advantage of in-memory interoperability is to use it in interactive sessions, where you’re exploring the data and want to try out some functions non-native to your language of choice.

Jupyter notebooks (and some other notebooks) make this possible from the Python side: using IPython line and cell magic and rpy2, you can easily run an R jupyter cell in your notebooks.

-
%load_ext rpy2.ipython  # line magic that loads the rpy2 ipython extension.
-                        # this extension allows the use of the following cell magic
-
-%%R -i input -o output  # this line allows to specify inputs 
-                        # (which will be converted to R objects) and outputs 
-                        # (which will be converted back to Python objects) 
-                        # this line is put at the start of a cell
-                        # the rest of the cell will be run as R code
+
%load_ext rpy2.ipython  # line magic that loads the rpy2 ipython extension.
+                        # this extension allows the use of the following cell magic
+
+%%R -i input -o output  # this line allows to specify inputs 
+                        # (which will be converted to R objects) and outputs 
+                        # (which will be converted back to Python objects) 
+                        # this line is put at the start of a cell
+                        # the rest of the cell will be run as R code

4.3 Usecase: ran in Python

We will perform the Compute DE step not in R, but in Python The pseudobulked data is read in:

-
import anndata as ad
-
-pd_adata = ad.read_h5ad("../usecase/data/pseudobulk.h5ad")
+
import anndata as ad
+
+pd_adata = ad.read_h5ad("../usecase/data/pseudobulk.h5ad")

Select small molecule and control:

-
sm_name = "Belinostat"
-control_name = "Dimethyl Sulfoxide"
+
sm_name = "Belinostat"
+control_name = "Dimethyl Sulfoxide"

Creating a DESeq dataset: This requires a bit more effort: we need to import the DESeq2 package, and combine the default, numpy2ri and pandas2ri converter to convert the count matrix and the obs dataframe.

-
import numpy as np
-
-import rpy2
-import rpy2.robjects as robjects
-
-from rpy2.robjects import numpy2ri
-from rpy2.robjects import pandas2ri
-
-from rpy2.robjects import default_converter
-from rpy2.robjects.packages import importr
-
-DESeq2 = importr("DESeq2")
-
-np_cv_rules = default_converter + numpy2ri.converter + pandas2ri.converter
-
-with np_cv_rules.context() as cv:
-    counts_dense = np.transpose(pd_adata.X.astype(np.int32))
-
-    robjects.globalenv["count_data"] = counts_dense
-    robjects.globalenv["obs_data"] = pd_adata.obs
+
import numpy as np
+
+import rpy2
+import rpy2.robjects as robjects
+
+from rpy2.robjects import numpy2ri
+from rpy2.robjects import pandas2ri
+
+from rpy2.robjects import default_converter
+from rpy2.robjects.packages import importr
+
+DESeq2 = importr("DESeq2")
+
+np_cv_rules = default_converter + numpy2ri.converter + pandas2ri.converter
+
+with np_cv_rules.context() as cv:
+    counts_dense = np.transpose(pd_adata.X.astype(np.int32))
+
+    robjects.globalenv["count_data"] = counts_dense
+    robjects.globalenv["obs_data"] = pd_adata.obs

We can also specify R formulas!

-
from rpy2.robjects import Formula
-
-design_formula = Formula('~ sm_name + plate_name')
-
-dds = DESeq2.DESeqDataSetFromMatrix(countData = robjects.globalenv["count_data"],
-        colData = robjects.globalenv["obs_data"],
-        design = design_formula)
+
from rpy2.robjects import Formula
+
+design_formula = Formula('~ sm_name + plate_name')
+
+dds = DESeq2.DESeqDataSetFromMatrix(countData = robjects.globalenv["count_data"],
+        colData = robjects.globalenv["obs_data"],
+        design = design_formula)

Run DESeq2:

-
dds = DESeq2.DESeq(dds)
+
dds = DESeq2.DESeq(dds)

Get results:

-
contrastv = robjects.StrVector(["sm_name", sm_name, control_name])
-res = DESeq2.results(dds, contrast=contrastv)
-
-base = importr('base')
-res = base.as_data_frame(res)
+
contrastv = robjects.StrVector(["sm_name", sm_name, control_name])
+res = DESeq2.results(dds, contrast=contrastv)
+
+base = importr('base')
+res = base.as_data_frame(res)

Preview results:

-
dplyr = importr('dplyr')
-utils = importr('utils')
-
-res = utils.head(dplyr.arrange(res, 'padj'), 10)
+
dplyr = importr('dplyr')
+utils = importr('utils')
+
+res = utils.head(dplyr.arrange(res, 'padj'), 10)

Write to disk: this again requires the pandas2ri converter to convert the results to a pandas dataframe.

-
with (robjects.default_converter + pandas2ri.converter).context():
-    res_pd = robjects.conversion.get_conversion().rpy2py(res)
-
-    res_pd.to_csv("../usecase/data/de_contrasts.csv")
+
with (robjects.default_converter + pandas2ri.converter).context():
+    res_pd = robjects.conversion.get_conversion().rpy2py(res)
+
+    res_pd.to_csv("../usecase/data/de_contrasts.csv")
diff --git a/search.json b/search.json index e4f4829..0e9c651 100644 --- a/search.json +++ b/search.json @@ -159,7 +159,7 @@ "href": "book/in_memory/rpy2.html", "title": "4  Rpy2", "section": "", - "text": "4.1 Rpy2: basic functionality\nRpy2 is a foreign function interface to R. It can be used in the following way:\nimport rpy2\nimport rpy2.robjects as robjects\n\n/home/runner/work/polygloty/polygloty/renv/python/virtualenvs/renv-python-3.12/lib/python3.12/site-packages/rpy2/rinterface_lib/embedded.py:276: UserWarning: R was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n warnings.warn(msg)\nR was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n\nvector = robjects.IntVector([1,2,3])\nrsum = robjects.r['sum']\n\nrsum(vector)\n\n\n IntVector with 1 elements.\n \n\n\n\n6\nLuckily, we’re not restricted to just calling R functions and creating R objects. The real power of this in-memory interoperability lies in the conversion of Python objects to R objects to call R functions on, and then to the conversion of the results back to Python objects.\nRpy2 requires specific conversion rules for different Python objects. It is straightforward to create R vectors from corresponding Python lists:\nstr_vector = robjects.StrVector(['abc', 'def', 'ghi'])\nflt_vector = robjects.FloatVector([0.3, 0.8, 0.7])\nint_vector = robjects.IntVector([1, 2, 3])\nmtx = robjects.r.matrix(robjects.IntVector(range(10)), nrow=5)\nHowever, for single cell biology, the objects that are most interesting to convert are (count) matrices, arrays and dataframes. In order to do this, you need to import the corresponding rpy2 modules and specify the conversion context.\nimport numpy as np\n\nfrom rpy2.robjects import numpy2ri\nfrom rpy2.robjects import default_converter\n\nrd_m = np.random.random((10, 7))\n\nwith (default_converter + numpy2ri.converter).context():\n mtx2 = robjects.r.matrix(rd_m, nrow = 10)\nimport pandas as pd\n\nfrom rpy2.robjects import pandas2ri\n\npd_df = pd.DataFrame({'int_values': [1,2,3],\n 'str_values': ['abc', 'def', 'ghi']})\n\nwith (default_converter + pandas2ri.converter).context():\n pd_df_r = robjects.DataFrame(pd_df)\nOne big limitation of rpy2 is the inability to convert sparse matrices: there is no built-in conversion module for scipy. The anndata2ri package provides, apart from functionality to convert SingleCellExperiment objects to an anndata objects, functions to convert sparse matrices.\nimport scipy as sp\n\nfrom anndata2ri import scipy2ri\n\nsparse_matrix = sp.sparse.csc_matrix(rd_m)\n\nwith (default_converter + scipy2ri.converter).context():\n sp_r = scipy2ri.py2rpy(sparse_matrix)\nWe will showcase how to use anndata2ri to convert an anndata object to a SingleCellExperiment object and vice versa as well:\nimport anndata as ad\nimport scanpy.datasets as scd\n\nMatplotlib is building the font cache; this may take a moment.\n\nimport anndata2ri\n\nadata_paul = scd.paul15()\n\n\n 0%| | 0.00/9.82M [00:00<?, ?B/s]\n 0%| | 16.0k/9.82M [00:00<01:04, 160kB/s]\n 0%| | 32.0k/9.82M [00:00<01:04, 160kB/s]\n 1%| | 96.0k/9.82M [00:00<00:27, 374kB/s]\n 2%|1 | 192k/9.82M [00:00<00:16, 596kB/s] \n 4%|3 | 400k/9.82M [00:00<00:08, 1.11MB/s]\n 8%|8 | 816k/9.82M [00:00<00:04, 2.11MB/s]\n 16%|#6 | 1.62M/9.82M [00:00<00:02, 4.10MB/s]\n 33%|###3 | 3.25M/9.82M [00:00<00:00, 7.97MB/s]\n 63%|######3 | 6.23M/9.82M [00:00<00:00, 14.8MB/s]\n 94%|#########3| 9.22M/9.82M [00:01<00:00, 19.3MB/s]\n100%|##########| 9.82M/9.82M [00:01<00:00, 9.71MB/s]\n\n\nwith anndata2ri.converter.context():\n sce = anndata2ri.py2rpy(adata_paul)\n ad2 = anndata2ri.rpy2py(sce)", + "text": "4.1 Rpy2: basic functionality\nRpy2 is a foreign function interface to R. It can be used in the following way:\nimport rpy2\nimport rpy2.robjects as robjects\n\n/home/runner/work/polygloty/polygloty/renv/python/virtualenvs/renv-python-3.12/lib/python3.12/site-packages/rpy2/rinterface_lib/embedded.py:276: UserWarning: R was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n warnings.warn(msg)\nR was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n\nvector = robjects.IntVector([1,2,3])\nrsum = robjects.r['sum']\n\nrsum(vector)\n\n\n IntVector with 1 elements.\n \n\n\n\n6\nLuckily, we’re not restricted to just calling R functions and creating R objects. The real power of this in-memory interoperability lies in the conversion of Python objects to R objects to call R functions on, and then to the conversion of the results back to Python objects.\nRpy2 requires specific conversion rules for different Python objects. It is straightforward to create R vectors from corresponding Python lists:\nstr_vector = robjects.StrVector(['abc', 'def', 'ghi'])\nflt_vector = robjects.FloatVector([0.3, 0.8, 0.7])\nint_vector = robjects.IntVector([1, 2, 3])\nmtx = robjects.r.matrix(robjects.IntVector(range(10)), nrow=5)\nHowever, for single cell biology, the objects that are most interesting to convert are (count) matrices, arrays and dataframes. In order to do this, you need to import the corresponding rpy2 modules and specify the conversion context.\nimport numpy as np\n\nfrom rpy2.robjects import numpy2ri\nfrom rpy2.robjects import default_converter\n\nrd_m = np.random.random((10, 7))\n\nwith (default_converter + numpy2ri.converter).context():\n mtx2 = robjects.r.matrix(rd_m, nrow = 10)\nimport pandas as pd\n\nfrom rpy2.robjects import pandas2ri\n\npd_df = pd.DataFrame({'int_values': [1,2,3],\n 'str_values': ['abc', 'def', 'ghi']})\n\nwith (default_converter + pandas2ri.converter).context():\n pd_df_r = robjects.DataFrame(pd_df)\nOne big limitation of rpy2 is the inability to convert sparse matrices: there is no built-in conversion module for scipy. The anndata2ri package provides, apart from functionality to convert SingleCellExperiment objects to an anndata objects, functions to convert sparse matrices.\nimport scipy as sp\n\nfrom anndata2ri import scipy2ri\n\nsparse_matrix = sp.sparse.csc_matrix(rd_m)\n\nwith (default_converter + scipy2ri.converter).context():\n sp_r = scipy2ri.py2rpy(sparse_matrix)\nWe will showcase how to use anndata2ri to convert an anndata object to a SingleCellExperiment object and vice versa as well:\nimport anndata as ad\nimport scanpy.datasets as scd\n\nimport anndata2ri\n\nadata_paul = scd.paul15()\n\n\n 0%| | 0.00/9.82M [00:00<?, ?B/s]\n 0%| | 8.00k/9.82M [00:00<03:13, 53.3kB/s]\n 0%| | 32.0k/9.82M [00:00<01:30, 114kB/s] \n 1%| | 96.0k/9.82M [00:00<00:40, 250kB/s]\n 2%|1 | 200k/9.82M [00:00<00:24, 416kB/s] \n 4%|4 | 408k/9.82M [00:00<00:13, 749kB/s]\n 8%|8 | 840k/9.82M [00:00<00:06, 1.44MB/s]\n 17%|#6 | 1.65M/9.82M [00:01<00:03, 2.75MB/s]\n 30%|##9 | 2.91M/9.82M [00:01<00:01, 4.35MB/s]\n 46%|####6 | 4.55M/9.82M [00:01<00:00, 6.55MB/s]\n 67%|######6 | 6.55M/9.82M [00:01<00:00, 8.60MB/s]\n 88%|########7 | 8.64M/9.82M [00:01<00:00, 9.21MB/s]\n 93%|#########2| 9.09M/9.82M [00:01<00:00, 8.04MB/s]\n100%|##########| 9.82M/9.82M [00:01<00:00, 5.35MB/s]\n\n\nwith anndata2ri.converter.context():\n sce = anndata2ri.py2rpy(adata_paul)\n ad2 = anndata2ri.rpy2py(sce)", "crumbs": [ "In-memory interoperability", "4  Rpy2" @@ -192,7 +192,7 @@ "href": "book/in_memory/reticulate.html", "title": "5  Reticulate", "section": "", - "text": "Reticulate is a foreign function interface in R to Python.\n\n6 Reticulate: basic functionality\nData types are automatically converted from Python to R and vice versa. A useful table of automatic conversions can be found here.\nYou can easily import python modules, and call the functions in the following way:\n\nlibrary(reticulate)\n\nbi <- reticulate::import_builtins()\nrd <- reticulate::import(\"random\")\n\nexample <- c(1,2,3)\nbi$max(example)\n\n[1] 3\n\nrd$choice(example)\n\n[1] 2\n\nbi$list(bi$reversed(example))\n\n[1] 3 2 1\n\n\nNumpy is also easily used:\n\nnp <- reticulate::import(\"numpy\")\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\n [,1] [,2]\n[1,] 1 2\n[2,] 3 4\n[3,] 5 6\n\n\nIf you want more finegrained control over conversion, you can specify in the import statement that you do not want results of functions of that package to be converted to R data types.\n\nnp <- reticulate::import(\"numpy\", convert = FALSE)\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\narray([[1., 2.],\n [3., 4.],\n [5., 6.]])\n\n\nYou can explicitly convert data types:\n\nresult <- np$concatenate(tuple(a, b), axis=0L)\n\npy_to_r(result)\n\n [,1] [,2]\n[1,] 1 2\n[2,] 3 4\n[3,] 5 6\n\nresult_r <- py_to_r(result)\nr_to_py(result_r)\n\narray([[1., 2.],\n [3., 4.],\n [5., 6.]])\n\n\n\n\n7 Interactivity\nYou can easily include Python chunks in Rmarkdown notebooks using the Python engine in knitr.\n\n\n8 Usecase\nWe will not showcase the usefulness of reticulate by using the DE analysis: it would involve loading in pandas to create a Python dataframe, adding rownames and columnnames and then grouping them, but that is easier to do natively in R.\nA more interesting thing you can do using reticulate is interacting with anndata-based Python packages, such as scanpy!\n\nlibrary(anndata)\nlibrary(reticulate)\nsc <- import(\"scanpy\")\n\nadata_path <- \"../usecase/data/sc_counts_subset.h5ad\"\nadata <- anndata::read_h5ad(adata_path)\n\nWe can preprocess the data:\n\nsc$pp$filter_cells(adata, min_genes = 200)\nsc$pp$filter_genes(adata, min_cells = 3)\n\n\nsc$pp$pca(adata)\nsc$pp$neighbors(adata)\nsc$tl$umap(adata)\n\nadata\n\nAnnData object with n_obs × n_vars = 32727 × 20542\n obs: 'dose_uM', 'timepoint_hr', 'well', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name', 'control', 'SMILES', 'sm_lincs_id', 'library_id', 'leiden_res1', 'group', 'cell_type_orig', 'plate_well_celltype_reannotated', 'cell_count_by_well_celltype', 'cell_count_by_plate_well', 'n_genes'\n var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells'\n uns: 'cell_type_colors', 'celltypist_celltype_colors', 'donor_id_colors', 'hvg', 'leiden_res1_colors', 'log1p', 'neighbors', 'over_clustering', 'rank_genes_groups', 'pca', 'umap'\n obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'\n varm: 'PCs'\n obsp: 'connectivities', 'distances'\n\n\nWe can’t easily show the result of the plot in this Quarto notebook, but we can save it and show it:\n\npath <- \"umap.png\"\nsc$pl$umap(adata, color=\"leiden_res1\", save=path)\n\n\n\n\n\n\n\nFigure 8.1: UMAP plot of the adata object", + "text": "Reticulate is a foreign function interface in R to Python.\n\n6 Reticulate: basic functionality\nData types are automatically converted from Python to R and vice versa. A useful table of automatic conversions can be found here.\nYou can easily import python modules, and call the functions in the following way:\n\nlibrary(reticulate)\n\nbi <- reticulate::import_builtins()\nrd <- reticulate::import(\"random\")\n\nexample <- c(1,2,3)\nbi$max(example)\n\n[1] 3\n\nrd$choice(example)\n\n[1] 3\n\nbi$list(bi$reversed(example))\n\n[1] 3 2 1\n\n\nNumpy is also easily used:\n\nnp <- reticulate::import(\"numpy\")\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\n [,1] [,2]\n[1,] 1 2\n[2,] 3 4\n[3,] 5 6\n\n\nIf you want more finegrained control over conversion, you can specify in the import statement that you do not want results of functions of that package to be converted to R data types.\n\nnp <- reticulate::import(\"numpy\", convert = FALSE)\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\narray([[1., 2.],\n [3., 4.],\n [5., 6.]])\n\n\nYou can explicitly convert data types:\n\nresult <- np$concatenate(tuple(a, b), axis=0L)\n\npy_to_r(result)\n\n [,1] [,2]\n[1,] 1 2\n[2,] 3 4\n[3,] 5 6\n\nresult_r <- py_to_r(result)\nr_to_py(result_r)\n\narray([[1., 2.],\n [3., 4.],\n [5., 6.]])\n\n\n\n\n7 Interactivity\nYou can easily include Python chunks in Rmarkdown notebooks using the Python engine in knitr.\n\n\n8 Usecase\nWe will not showcase the usefulness of reticulate by using the DE analysis: it would involve loading in pandas to create a Python dataframe, adding rownames and columnnames and then grouping them, but that is easier to do natively in R.\nA more interesting thing you can do using reticulate is interacting with anndata-based Python packages, such as scanpy!\n\nlibrary(anndata)\nlibrary(reticulate)\nsc <- import(\"scanpy\")\n\nadata_path <- \"../usecase/data/sc_counts_subset.h5ad\"\nadata <- anndata::read_h5ad(adata_path)\n\nWe can preprocess the data:\n\nsc$pp$filter_cells(adata, min_genes = 200)\nsc$pp$filter_genes(adata, min_cells = 3)\n\n\nsc$pp$pca(adata)\nsc$pp$neighbors(adata)\nsc$tl$umap(adata)\n\nadata\n\nAnnData object with n_obs × n_vars = 32727 × 20542\n obs: 'dose_uM', 'timepoint_hr', 'well', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name', 'control', 'SMILES', 'sm_lincs_id', 'library_id', 'leiden_res1', 'group', 'cell_type_orig', 'plate_well_celltype_reannotated', 'cell_count_by_well_celltype', 'cell_count_by_plate_well', 'n_genes'\n var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells'\n uns: 'cell_type_colors', 'celltypist_celltype_colors', 'donor_id_colors', 'hvg', 'leiden_res1_colors', 'log1p', 'neighbors', 'over_clustering', 'rank_genes_groups', 'pca', 'umap'\n obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'\n varm: 'PCs'\n obsp: 'connectivities', 'distances'\n\n\nWe can’t easily show the result of the plot in this Quarto notebook, but we can save it and show it:\n\npath <- \"umap.png\"\nsc$pl$umap(adata, color=\"leiden_res1\", save=path)\n\n\n\n\n\n\n\nFigure 8.1: UMAP plot of the adata object", "crumbs": [ "In-memory interoperability", "5  Reticulate" @@ -280,7 +280,7 @@ "href": "book/disk_based/disk_based_pipelines.html#containerized-pipelines", "title": "7  Disk-based pipelines", "section": "7.4 Containerized pipelines", - "text": "7.4 Containerized pipelines\nContainers are a great way to manage the environments for your pipeline and make them reproducible on different platforms, given that you make accessible and store the container images for a long time.\nYou can create a Docker image with all the pixi environments and run the pipeline in multiple environments with a single container. The image is ~5GB and the pipeline can require a lot of working memory ~20GB, so make sure to increase the RAM allocated to Docker in your settings. Note that the usecase/ and book/ folders are mounted to the Docker container, so you can interactively edit the scripts and access the data.\ndocker pull berombau/polygloty-docker:latest\ndocker run -it -v $(pwd)/usecase:/app/usecase -v $(pwd)/book:/app/book berombau/polygloty-docker:latest pixi run pipeline\nAnother approach is to use multi-package containers. Tools like Multi-Package BioContainers and Seqera Containers can make this quick and easy, by allowing for custom combinations of packages.\nYou can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Nextflow or Snakemake to manage the pipeline for you.", + "text": "7.4 Containerized pipelines\nContainers are a great way to manage the environments for your pipeline and make them reproducible on different platforms, given that you make accessible and store the container images for a long time.\nYou can create a Docker image with all the pixi environments and run the pipeline in multiple environments with a single container. The image is ~5GB and the pipeline can require a lot of working memory ~20GB, so make sure to increase the RAM allocated to Docker in your settings. Note that the usecase/ and book/ folders are mounted to the Docker container, so you can interactively edit the scripts and access the data.\ndocker pull berombau/polygloty-docker:latest\ndocker run -it -v $(pwd)/usecase:/app/usecase -v $(pwd)/book:/app/book berombau/polygloty-docker:latest pixi run pipeline\nAnother approach is to use multi-package containers. Tools like Multi-Package BioContainers and Seqera Containers can make this quick and easy, by allowing for custom combinations of packages.\nYou can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Viash, Nextflow or Snakemake to manage the pipeline for you.", "crumbs": [ "Disk-based interoperability", "7  Disk-based pipelines" diff --git a/sitemap.xml b/sitemap.xml index f37f2b4..5646682 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,74 +2,74 @@ https://saeyslab.github.io/polygloty/index.html - 2024-09-11T22:13:35.893Z + 2024-09-12T08:33:57.968Z https://saeyslab.github.io/polygloty/book/introduction.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/usecase/index.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/in_memory/pitfalls.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/in_memory/rpy2.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/in_memory/reticulate.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/disk_based/file_formats.html - 2024-09-11T22:13:35.881Z + 2024-09-12T08:33:57.960Z https://saeyslab.github.io/polygloty/book/disk_based/disk_based_pipelines.html - 2024-09-11T22:13:35.881Z + 2024-09-12T08:33:57.960Z https://saeyslab.github.io/polygloty/book/workflow_frameworks/review.html - 2024-09-11T22:13:35.889Z + 2024-09-12T08:33:57.968Z https://saeyslab.github.io/polygloty/book/workflow_frameworks/qualities.html - 2024-09-11T22:13:35.889Z + 2024-09-12T08:33:57.968Z https://saeyslab.github.io/polygloty/book/workflow_frameworks/quality_assessment.html - 2024-09-11T22:13:35.889Z + 2024-09-12T08:33:57.968Z https://saeyslab.github.io/polygloty/book/workflow_frameworks/viash_nextflow.html - 2024-09-11T22:13:35.889Z + 2024-09-12T08:33:57.968Z https://saeyslab.github.io/polygloty/book/workflow_frameworks/best_practices.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/book_slides.html - 2024-09-11T22:13:35.881Z + 2024-09-12T08:33:57.960Z https://saeyslab.github.io/polygloty/book/references.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/in_memory/index.html - 2024-09-11T22:13:35.885Z + 2024-09-12T08:33:57.964Z https://saeyslab.github.io/polygloty/book/disk_based/index.html - 2024-09-11T22:13:35.881Z + 2024-09-12T08:33:57.960Z https://saeyslab.github.io/polygloty/book/workflow_frameworks/index.html - 2024-09-11T22:13:35.889Z + 2024-09-12T08:33:57.968Z