Skip to content

Commit 78a4ee4

Browse files
authored
Functionality to fetch, search and save datasets (#1)
Update documentation, tests and everything else
1 parent 5427b3d commit 78a4ee4

12 files changed

+755
-114
lines changed

.github/workflows/pypi-test.yml

+41-23
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,54 @@ name: Test the library
22

33
on:
44
push:
5-
branches: [ master ]
5+
branches: [master]
66
pull_request:
7-
branches: [ master ]
7+
branches: [master]
88

99
jobs:
1010
build:
11-
1211
runs-on: ubuntu-latest
1312
strategy:
1413
matrix:
15-
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
14+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
1615

1716
name: Python ${{ matrix.python-version }}
1817
steps:
19-
- uses: actions/checkout@v2
20-
- name: Setup Python
21-
uses: actions/setup-python@v2
22-
with:
23-
python-version: ${{ matrix.python-version }}
24-
cache: 'pip'
25-
- name: Install dependencies
26-
run: |
27-
python -m pip install --upgrade pip
28-
pip install flake8 pytest tox
29-
# - name: Lint with flake8
30-
# run: |
31-
# # stop the build if there are Python syntax errors or undefined names
32-
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33-
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34-
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35-
- name: Test with tox
36-
run: |
37-
tox
18+
- uses: actions/checkout@v2
19+
- name: Setup Python
20+
uses: actions/setup-python@v2
21+
with:
22+
python-version: ${{ matrix.python-version }}
23+
cache: "pip"
24+
25+
# - name: Update SQLite3
26+
# run: sudo apt install -y sqlite3
27+
28+
# build SQLite from source, because I need 3.35<=
29+
- run: |
30+
wget https://www.sqlite.org/2024/sqlite-autoconf-3450300.tar.gz
31+
tar -xvf sqlite-autoconf-3450300.tar.gz
32+
- run: |
33+
./configure
34+
make
35+
sudo make install
36+
export PATH="/usr/local/lib:$PATH"
37+
working-directory: sqlite-autoconf-3450300
38+
39+
- name: Install dependencies
40+
run: |
41+
python -m pip install --upgrade pip
42+
pip install flake8 pytest tox
43+
env:
44+
LD_LIBRARY_PATH: /usr/local/lib
45+
# - name: Lint with flake8
46+
# run: |
47+
# # stop the build if there are Python syntax errors or undefined names
48+
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
49+
# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
50+
# # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
51+
- name: Test with tox
52+
run: |
53+
tox
54+
env:
55+
LD_LIBRARY_PATH: /usr/local/lib

setup.cfg

+6-1
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,12 @@ install_requires =
5151
importlib-metadata; python_version<"3.8"
5252
dolomite_base
5353
dolomite_matrix
54-
dolomite_sce
54+
dolomite_sce>=0.1.2
5555
gypsum_client>=0.1.1
5656
delayedarray
5757
summarizedexperiment
5858
singlecellexperiment
59+
pandas
5960

6061
[options.packages.find]
6162
where = src
@@ -66,12 +67,16 @@ exclude =
6667
# Add here additional requirements for extra features, to install with:
6768
# `pip install scrnaseq[PDF]` like:
6869
# PDF = ReportLab; RXP
70+
optional =
71+
anndata
6972

7073
# Add here test requirements (semicolon/line-separated)
7174
testing =
7275
setuptools
7376
pytest
7477
pytest-cov
78+
scipy
79+
anndata
7580

7681
[options.entry_points]
7782
# Add here console scripts like:

src/scrnaseq/__init__.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,7 @@
1515
finally:
1616
del version, PackageNotFoundError
1717

18-
from .fetch_dataset import fetch_dataset, fetch_metadata
18+
from .fetch_dataset import fetch_dataset, fetch_metadata
19+
from .list_datasets import list_datasets
20+
from .list_versions import fetch_latest_version, list_versions
21+
from .save_dataset import save_dataset

src/scrnaseq/fetch_dataset.py

+31-86
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
import json
33
import os
44

5-
from delayedarray import is_sparse, to_dense_array, to_scipy_sparse_matrix
6-
from dolomite_base import alt_read_object, alt_read_object_function, read_object
5+
from dolomite_base import alt_read_object, alt_read_object_function
76
from gypsum_client import cache_directory, save_file, save_version
8-
from singlecellexperiment import SingleCellExperiment
97
from summarizedexperiment import SummarizedExperiment
108

9+
from .utils import single_cell_load_object
10+
1111
__author__ = "Jayaram Kancherla"
1212
__copyright__ = "Jayaram Kancherla"
1313
__license__ = "MIT"
@@ -24,7 +24,24 @@ def fetch_dataset(
2424
realize_reduced_dims: bool = True,
2525
**kwargs,
2626
) -> SummarizedExperiment:
27-
"""Fetch a dataset from the gypsum backend.
27+
"""Fetch a single-cell dataset from the gypsum backend.
28+
29+
See Also:
30+
`metadata index <https://github.com/ArtifactDB/bioconductor-metadata-index>`_,
31+
on the expected schema for the metadata.
32+
33+
:py:func:`~scrnaseq.save_dataset.save_dataset` and
34+
:py:func:`~gypsum_client.upload_file_operations.upload_directory`,
35+
to save and upload a dataset.
36+
37+
:py:func:`~scrnaseq.survey_datasets.survey_datasets` and :py:func:`~scrnaseq.list_versions.list_versions`,
38+
to get possible values for `name` and `version`.
39+
40+
Example:
41+
42+
.. code-block:: python
43+
44+
sce = fetch_dataset("zeisel-brain-2015", "2023-12-14")
2845
2946
Args:
3047
name:
@@ -99,6 +116,16 @@ def fetch_metadata(
99116
):
100117
"""Fetch metadata for a dataset from the gypsum backend.
101118
119+
See Also:
120+
:py:func:`~.fetch_dataset`,
121+
to fetch a dataset.
122+
123+
Example:
124+
125+
.. code-block:: python
126+
127+
meta = fetch_metadata("zeisel-brain-2015", "2023-12-14")
128+
102129
Args:
103130
name:
104131
Name of the dataset.
@@ -133,85 +160,3 @@ def fetch_metadata(
133160
metadata = json.load(f)
134161

135162
return metadata
136-
137-
138-
def single_cell_load_object(
139-
path: str,
140-
metadata: dict = None,
141-
scrnaseq_realize_assays: bool = False,
142-
scrnaseq_realize_reduced_dims: bool = True,
143-
**kwargs,
144-
):
145-
"""Load a ``SummarizedExperiment`` or ``SingleCellExperiment`` object from a file.
146-
147-
Args:
148-
path:
149-
Path to the dataset.
150-
151-
metadata:
152-
Metadata for the dataset.
153-
Defaults to None.
154-
155-
scrnaseq_realize_assays:
156-
Whether to realize assays into memory.
157-
Defaults to False.
158-
159-
scrnaseq_realize_reduced_dims:
160-
Whether to realize reduced dimensions into memory.
161-
Defaults to True.
162-
163-
**kwargs:
164-
Further arguments to pass to
165-
:py:func:`~dolomite_base.read_object.read_object`.
166-
167-
Returns:
168-
A `SummarizedExperiment` of the object.
169-
"""
170-
obj = read_object(
171-
path,
172-
metadata=metadata,
173-
scrnaseq_realize_assays=scrnaseq_realize_assays,
174-
scrnaseq_realize_reduced_dims=scrnaseq_realize_reduced_dims,
175-
**kwargs,
176-
)
177-
178-
if isinstance(obj, SummarizedExperiment):
179-
if scrnaseq_realize_assays:
180-
_assays = {}
181-
for y in obj.get_assay_names():
182-
_assays[y] = realize_array(obj.assay(y))
183-
184-
obj = obj.set_assays(_assays)
185-
186-
if isinstance(obj, SingleCellExperiment):
187-
if scrnaseq_realize_reduced_dims:
188-
_red_dims = {}
189-
for z in obj.get_reduced_dim_names():
190-
_red_dims[z] = realize_array(obj.reduced_dim(z))
191-
192-
obj = obj.set_reduced_dims(_red_dims)
193-
194-
return obj
195-
196-
197-
def realize_array(x):
198-
"""
199-
Realize a `ReloadedArray` into a dense array or sparse matrix.
200-
201-
Args:
202-
x:
203-
`ReloadedArray` object.
204-
205-
Returns:
206-
207-
Realized array or matrix.
208-
"""
209-
from dolomite_matrix import ReloadedArray
210-
211-
if isinstance(x, ReloadedArray):
212-
if is_sparse(x):
213-
x = to_scipy_sparse_matrix(x, "csr")
214-
else:
215-
x = to_dense_array(x)
216-
217-
return x

0 commit comments

Comments
 (0)