import anndata as ad
-import scanpy.datasets as scd
Matplotlib is building the font cache; this may take a moment.
-import anndata2ri
-
-= scd.paul15() adata_paul
diff --git a/book/disk_based/disk_based_pipelines.html b/book/disk_based/disk_based_pipelines.html index 106b36c..c81a83b 100644 --- a/book/disk_based/disk_based_pipelines.html +++ b/book/disk_based/disk_based_pipelines.html @@ -555,7 +555,7 @@
docker pull berombau/polygloty-docker:latest
docker run -it -v $(pwd)/usecase:/app/usecase -v $(pwd)/book:/app/book berombau/polygloty-docker:latest pixi run pipeline
Another approach is to use multi-package containers. Tools like Multi-Package BioContainers and Seqera Containers can make this quick and easy, by allowing for custom combinations of packages.
-You can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Nextflow or Snakemake to manage the pipeline for you.
+You can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Viash, Nextflow or Snakemake to manage the pipeline for you.
diff --git a/book/in_memory/reticulate.html b/book/in_memory/reticulate.html index 3f3fee3..508f61e 100644 --- a/book/in_memory/reticulate.html +++ b/book/in_memory/reticulate.html @@ -322,7 +322,7 @@$choice(example) rd
[1] 2
+[1] 3
$list(bi$reversed(example)) bi
import anndata as ad
-import scanpy.datasets as scd
Matplotlib is building the font cache; this may take a moment.
-import anndata2ri
-
-= scd.paul15() adata_paul
0%| | 0.00/9.82M [00:00<?, ?B/s]
- 0%| | 16.0k/9.82M [00:00<01:04, 160kB/s]
- 0%| | 32.0k/9.82M [00:00<01:04, 160kB/s]
- 1%| | 96.0k/9.82M [00:00<00:27, 374kB/s]
- 2%|1 | 192k/9.82M [00:00<00:16, 596kB/s]
- 4%|3 | 400k/9.82M [00:00<00:08, 1.11MB/s]
- 8%|8 | 816k/9.82M [00:00<00:04, 2.11MB/s]
- 16%|#6 | 1.62M/9.82M [00:00<00:02, 4.10MB/s]
- 33%|###3 | 3.25M/9.82M [00:00<00:00, 7.97MB/s]
- 63%|######3 | 6.23M/9.82M [00:00<00:00, 14.8MB/s]
- 94%|#########3| 9.22M/9.82M [00:01<00:00, 19.3MB/s]
-100%|##########| 9.82M/9.82M [00:01<00:00, 9.71MB/s]
+ 0%| | 8.00k/9.82M [00:00<03:13, 53.3kB/s]
+ 0%| | 32.0k/9.82M [00:00<01:30, 114kB/s]
+ 1%| | 96.0k/9.82M [00:00<00:40, 250kB/s]
+ 2%|1 | 200k/9.82M [00:00<00:24, 416kB/s]
+ 4%|4 | 408k/9.82M [00:00<00:13, 749kB/s]
+ 8%|8 | 840k/9.82M [00:00<00:06, 1.44MB/s]
+ 17%|#6 | 1.65M/9.82M [00:01<00:03, 2.75MB/s]
+ 30%|##9 | 2.91M/9.82M [00:01<00:01, 4.35MB/s]
+ 46%|####6 | 4.55M/9.82M [00:01<00:00, 6.55MB/s]
+ 67%|######6 | 6.55M/9.82M [00:01<00:00, 8.60MB/s]
+ 88%|########7 | 8.64M/9.82M [00:01<00:00, 9.21MB/s]
+ 93%|#########2| 9.09M/9.82M [00:01<00:00, 8.04MB/s]
+100%|##########| 9.82M/9.82M [00:01<00:00, 5.35MB/s]
-with anndata2ri.converter.context():
-= anndata2ri.py2rpy(adata_paul)
- sce = anndata2ri.rpy2py(sce) ad2
+with anndata2ri.converter.context():
+= anndata2ri.py2rpy(adata_paul)
+ sce = anndata2ri.rpy2py(sce) ad2
Jupyter notebooks (and some other notebooks) make this possible from the Python side: using IPython line and cell magic and rpy2, you can easily run an R jupyter cell in your notebooks.
%load_ext rpy2.ipython # line magic that loads the rpy2 ipython extension.
-# this extension allows the use of the following cell magic
-
-%%R -i input -o output # this line allows to specify inputs
-# (which will be converted to R objects) and outputs
- # (which will be converted back to Python objects)
- # this line is put at the start of a cell
- # the rest of the cell will be run as R code
%load_ext rpy2.ipython # line magic that loads the rpy2 ipython extension.
+# this extension allows the use of the following cell magic
+
+%%R -i input -o output # this line allows to specify inputs
+# (which will be converted to R objects) and outputs
+ # (which will be converted back to Python objects)
+ # this line is put at the start of a cell
+ # the rest of the cell will be run as R code
We will perform the Compute DE step not in R, but in Python The pseudobulked data is read in:
import anndata as ad
-
-= ad.read_h5ad("../usecase/data/pseudobulk.h5ad") pd_adata
import anndata as ad
+
+= ad.read_h5ad("../usecase/data/pseudobulk.h5ad") pd_adata
Select small molecule and control:
= "Belinostat"
- sm_name = "Dimethyl Sulfoxide" control_name
= "Belinostat"
+ sm_name = "Dimethyl Sulfoxide" control_name
Creating a DESeq dataset: This requires a bit more effort: we need to import the DESeq2 package, and combine the default, numpy2ri and pandas2ri converter to convert the count matrix and the obs dataframe.
import numpy as np
-
-import rpy2
-import rpy2.robjects as robjects
-
-from rpy2.robjects import numpy2ri
-from rpy2.robjects import pandas2ri
-
-from rpy2.robjects import default_converter
-from rpy2.robjects.packages import importr
-
-= importr("DESeq2")
- DESeq2
-= default_converter + numpy2ri.converter + pandas2ri.converter
- np_cv_rules
-with np_cv_rules.context() as cv:
-= np.transpose(pd_adata.X.astype(np.int32))
- counts_dense
-"count_data"] = counts_dense
- robjects.globalenv["obs_data"] = pd_adata.obs robjects.globalenv[
import numpy as np
+
+import rpy2
+import rpy2.robjects as robjects
+
+from rpy2.robjects import numpy2ri
+from rpy2.robjects import pandas2ri
+
+from rpy2.robjects import default_converter
+from rpy2.robjects.packages import importr
+
+= importr("DESeq2")
+ DESeq2
+= default_converter + numpy2ri.converter + pandas2ri.converter
+ np_cv_rules
+with np_cv_rules.context() as cv:
+= np.transpose(pd_adata.X.astype(np.int32))
+ counts_dense
+"count_data"] = counts_dense
+ robjects.globalenv["obs_data"] = pd_adata.obs robjects.globalenv[
We can also specify R formulas!
from rpy2.robjects import Formula
-
-= Formula('~ sm_name + plate_name')
- design_formula
-= DESeq2.DESeqDataSetFromMatrix(countData = robjects.globalenv["count_data"],
- dds = robjects.globalenv["obs_data"],
- colData = design_formula) design
from rpy2.robjects import Formula
+
+= Formula('~ sm_name + plate_name')
+ design_formula
+= DESeq2.DESeqDataSetFromMatrix(countData = robjects.globalenv["count_data"],
+ dds = robjects.globalenv["obs_data"],
+ colData = design_formula) design
Run DESeq2:
= DESeq2.DESeq(dds) dds
= DESeq2.DESeq(dds) dds
Get results:
= robjects.StrVector(["sm_name", sm_name, control_name])
- contrastv = DESeq2.results(dds, contrast=contrastv)
- res
-= importr('base')
- base = base.as_data_frame(res) res
= robjects.StrVector(["sm_name", sm_name, control_name])
+ contrastv = DESeq2.results(dds, contrast=contrastv)
+ res
+= importr('base')
+ base = base.as_data_frame(res) res
Preview results:
= importr('dplyr')
- dplyr = importr('utils')
- utils
-= utils.head(dplyr.arrange(res, 'padj'), 10) res
= importr('dplyr')
+ dplyr = importr('utils')
+ utils
+= utils.head(dplyr.arrange(res, 'padj'), 10) res
Write to disk: this again requires the pandas2ri converter to convert the results to a pandas dataframe.
with (robjects.default_converter + pandas2ri.converter).context():
-= robjects.conversion.get_conversion().rpy2py(res)
- res_pd
-"../usecase/data/de_contrasts.csv") res_pd.to_csv(
with (robjects.default_converter + pandas2ri.converter).context():
+= robjects.conversion.get_conversion().rpy2py(res)
+ res_pd
+"../usecase/data/de_contrasts.csv") res_pd.to_csv(