diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml deleted file mode 100644 index 5395d11..0000000 --- a/.github/workflows/build-docs.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Build documentation - -on: - push: - branches: - - master - -jobs: - test: - name: Build docs - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - cache: 'pip' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install tox - - - name: Build docs - run: | - tox -e docs - touch ./docs/_build/html/.nojekyll - - - name: GH Pages Deployment - if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/') - uses: JamesIves/github-pages-deploy-action@4.1.3 - with: - branch: gh-pages # The branch the action should deploy to. - folder: ./docs/_build/html - clean: true # Automatically remove deleted files from the deploy branch diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 3ff2241..91ce4a6 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -1,3 +1,6 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + name: Publish to PyPI on: @@ -6,32 +9,43 @@ on: jobs: build: + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - + - uses: actions/checkout@v2 - name: Set up Python 3.9 uses: actions/setup-python@v2 with: python-version: 3.9 - cache: 'pip' - - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest tox - + pip install flake8 pytest tox + # - name: Lint with flake8 + # run: | + # # stop the build if there are Python syntax errors or undefined names + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with tox run: | tox - - - name: Build package + - name: Build docs + run: | + tox -e docs + - run: touch ./docs/_build/html/.nojekyll + - name: GH Pages Deployment + uses: JamesIves/github-pages-deploy-action@4.1.3 + with: + branch: gh-pages # The branch the action should deploy to. + folder: ./docs/_build/html + clean: true # Automatically remove deleted files from the deploy branch + - name: Build Project and Publish run: | python -m tox -e clean,build - - name: Publish package uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ - password: ${{ secrets.PYPI_PASSWORD }} + password: ${{ secrets.PYPI_PASSWORD }} \ No newline at end of file diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 6f98cbf..f35fd23 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -2,29 +2,36 @@ name: Test the library on: push: - branches: - - master + branches: [ master ] pull_request: + branches: [ master ] jobs: build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + name: Python ${{ matrix.python-version }} steps: - - name: Check out repo - uses: actions/checkout@v3 - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - cache: 'pip' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install tox - - - name: Test with tox - run: | - tox + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest tox + # - name: Lint with flake8 + # run: | + # # stop the build if there are Python syntax errors or undefined names + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with tox + run: | + tox diff --git a/setup.cfg b/setup.cfg index 496967e..1900428 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,17 +5,17 @@ [metadata] name = dolomite-sce -description = Add a short description here! +description = Save and load single-cell experiments in the dolomite framework! author = LTLA author_email = infinite.monkeys.with.keyboards@gmail.com license = MIT license_files = LICENSE.txt long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8; variant=GFM -url = https://github.com/pyscaffold/pyscaffold/ +url = https://github.com/ArtifactDB/dolomite-sce # Add here related links, for example: project_urls = - Documentation = https://pyscaffold.org/ + Documentation = https://github.com/ArtifactDB/dolomite-sce # Source = https://github.com/pyscaffold/pyscaffold/ # Changelog = https://pyscaffold.org/en/latest/changelog.html # Tracker = https://github.com/pyscaffold/pyscaffold/issues @@ -41,7 +41,7 @@ package_dir = =src # Require a min/specific Python version (comma-separated conditions) -# python_requires = >=3.8 +python_requires = >=3.8 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in @@ -49,9 +49,9 @@ package_dir = # For more information, check out https://semver.org/. install_requires = importlib-metadata; python_version<"3.8" - dolomite_base - dolomite_se - SummarizedExperiment + dolomite_base==0.2.0-alpha6 + dolomite_se==0.1.0-alpha2 + SingleCellExperiment>=0.4.3,<0.5.0 [options.packages.find] diff --git a/src/dolomite_sce/__init__.py b/src/dolomite_sce/__init__.py index 34a4830..228afa3 100644 --- a/src/dolomite_sce/__init__.py +++ b/src/dolomite_sce/__init__.py @@ -16,4 +16,5 @@ del version, PackageNotFoundError -from .load_single_cell_experiment import load_single_cell_experiment +from .read_single_cell_experiment import read_single_cell_experiment +from .save_single_cell_experiment import save_single_cell_experiment diff --git a/src/dolomite_sce/load_single_cell_experiment.py b/src/dolomite_sce/load_single_cell_experiment.py deleted file mode 100644 index e2421e6..0000000 --- a/src/dolomite_sce/load_single_cell_experiment.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Any -from dolomite_se import load_summarized_experiment - - -def load_single_cell_experiment(meta: dict[str, Any], project): - # TODO: actually load an SCE once the SCE package has cleaned up its shit. - return load_summarized_experiment(meta, project) diff --git a/src/dolomite_sce/read_single_cell_experiment.py b/src/dolomite_sce/read_single_cell_experiment.py new file mode 100644 index 0000000..aa7f956 --- /dev/null +++ b/src/dolomite_sce/read_single_cell_experiment.py @@ -0,0 +1,103 @@ +import json +import os + +import dolomite_base as dl +from dolomite_base.read_object import read_object_registry +from dolomite_se import read_common_se_props +from singlecellexperiment import SingleCellExperiment + +read_object_registry[ + "single_cell_experiment" +] = "dolomite_sce.read_single_cell_experiment" + + +def read_single_cell_experiment( + path: str, metadata: dict, **kwargs +) -> SingleCellExperiment: + """Load a + :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` + from its on-disk representation. + + This method should generally not be called directly but instead be invoked by + :py:meth:`~dolomite_base.read_object.read_object`. + + Args: + path: + Path to the directory containing the object. + + metadata: + Metadata for the object. + + kwargs: + Further arguments, ignored. + + Returns: + A + :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` + with file-backed arrays in the assays. + """ + + _row_data, _column_data, _assays = read_common_se_props(path) + + _main_expt_name = None + if "main_experiment_name" in metadata["single_cell_experiment"]: + _main_expt_name = metadata["single_cell_experiment"]["main_experiment_name"] + + sce = SingleCellExperiment( + assays=_assays, + row_data=_row_data, + column_data=_column_data, + main_experiment_name=_main_expt_name, + ) + + _meta_path = os.path.join(path, "other_data") + if os.path.exists(_meta_path): + _meta = dl.read_object(_meta_path) + sce = sce.set_metadata(_meta.as_dict()) + + _ranges_path = os.path.join(path, "row_ranges") + if os.path.exists(_ranges_path): + _ranges = dl.read_object(_ranges_path) + sce = sce.set_row_ranges(_ranges) + + _rdim_path = os.path.join(path, "reduced_dimensions") + if os.path.exists(_rdim_path): + _rdims = {} + + with open(os.path.join(_rdim_path, "names.json"), "r") as handle: + _rdim_names = json.load(handle) + + for _aidx, _aname in enumerate(_rdim_names): + _rdim_read_path = os.path.join(_rdim_path, str(_aidx)) + + try: + _rdims[_aname] = dl.read_object(_rdim_read_path) + except Exception as ex: + raise RuntimeError( + f"failed to load reduced dimension '{_aname}' from '{path}'; " + + str(ex) + ) + + sce = sce.set_reduced_dims(_rdims) + + _alt_path = os.path.join(path, "alternative_experiments") + if os.path.exists(_alt_path): + _alts = {} + + with open(os.path.join(_alt_path, "names.json"), "r") as handle: + _alt_names = json.load(handle) + + for _aidx, _aname in enumerate(_alt_names): + _alt_read_path = os.path.join(_alt_path, str(_aidx)) + + try: + _alts[_aname] = dl.read_object(_alt_read_path) + except Exception as ex: + raise RuntimeError( + f"failed to load alternative experiment '{_aname}' from '{path}'; " + + str(ex) + ) + + sce = sce.set_alternative_experiments(_alts) + + return sce diff --git a/src/dolomite_sce/save_single_cell_experiment.py b/src/dolomite_sce/save_single_cell_experiment.py new file mode 100644 index 0000000..17658e0 --- /dev/null +++ b/src/dolomite_sce/save_single_cell_experiment.py @@ -0,0 +1,151 @@ +import json +import os + +import dolomite_base as dl +from dolomite_se import save_common_se_props +from singlecellexperiment import SingleCellExperiment + + +@dl.save_object.register +@dl.validate_saves +def save_single_cell_experiment( + x: SingleCellExperiment, + path: str, + data_frame_args: dict = None, + assay_args: dict = None, + rdim_args: dict = None, + alt_expts_args: dict = None, + **kwargs, +): + """Method for saving + :py:class:`~singlecellexperiment.SingleCellExperiment.SingleCellExperiment` + objects to their corresponding file representations, see + :py:meth:`~dolomite_base.save_object.save_object` for details. + + Args: + x: + Object to be staged. + + path: + Path to a directory in which to save ``x``. + + data_frame_args: + Further arguments to pass to the ``save_object`` method for the + row/column data. + + assay_args: + Further arguments to pass to the ``save_object`` method for the + assays. + + rdim_args: + Further arguments to pass to the ``save_object`` method for the + reduced dimensions. + + alt_expts_args: + Further arguments to pass to the ``save_object`` method for the + alternative experiments. + + kwargs: + Further arguments, ignored. + + Returns: + ``x`` is saved to path. + """ + os.mkdir(path) + + if data_frame_args is None: + data_frame_args = {} + + if assay_args is None: + assay_args = {} + + if rdim_args is None: + rdim_args = {} + + if alt_expts_args is None: + alt_expts_args = {} + + _se_meta = f"{list(x.shape)}" + + _sce_meta = '"single_cell_experiment": { "version": "1.0" }' + if x.get_main_experiment_name() is not None: + _sce_meta = ( + '"single_cell_experiment": { "version": "1.0", "main_experiment_name": "' + + str(x.get_main_experiment_name()) + + '" }' + ) + + with open(os.path.join(path, "OBJECT"), "w", encoding="utf-8") as handle: + handle.write( + '{ "type": "single_cell_experiment", ' + + _sce_meta + + ", " + + '"ranged_summarized_experiment": { "version": "1.0" },' + + '"summarized_experiment": {"version": "1.0", "dimensions": ' + + _se_meta + + " } }" + ) + + save_common_se_props( + x, path, data_frame_args=data_frame_args, assay_args=assay_args + ) + + _ranges = x.get_row_ranges() + if _ranges is not None: + dl.save_object(_ranges, path=os.path.join(path, "row_ranges")) + + # save rdims + _rdim_names = x.get_reduced_dim_names() + if len(_rdim_names) > 0: + _rdim_path = os.path.join(path, "reduced_dimensions") + os.mkdir(_rdim_path) + + with open(os.path.join(_rdim_path, "names.json"), "w") as handle: + json.dump(_rdim_names, handle) + + for _aidx, _aname in enumerate(_rdim_names): + _rdim_save_path = os.path.join(_rdim_path, str(_aidx)) + try: + dl.save_object(x.reduced_dim(_aname), path=_rdim_save_path, **rdim_args) + except Exception as ex: + raise RuntimeError( + "failed to stage reduced dimension '" + + _aname + + "' for " + + str(type(x)) + + "; " + + str(ex) + ) + + # save alt expts. + _alt_names = x.get_alternative_experiment_names() + print(_alt_names) + print(x) + print(x.get_alternative_experiments()) + if len(_alt_names) > 0: + _alt_path = os.path.join(path, "alternative_experiments") + os.mkdir(_alt_path) + + with open(os.path.join(_alt_path, "names.json"), "w") as handle: + json.dump(_alt_names, handle) + + for _aidx, _aname in enumerate(_alt_names): + print(_aidx, _aname) + _alt_save_path = os.path.join(_alt_path, str(_aidx)) + print(x.alternative_experiment(_aname)) + try: + dl.save_object( + x.alternative_experiment(_aname), + path=_alt_save_path, + **alt_expts_args, + ) + except Exception as ex: + raise RuntimeError( + "failed to stage alternative experiment '" + + _aname + + "' for " + + str(type(x)) + + "; " + + str(ex) + ) + return diff --git a/tests/test_stage_single_cell_experiment.py b/tests/test_stage_single_cell_experiment.py index d7a9f50..0c9a544 100644 --- a/tests/test_stage_single_cell_experiment.py +++ b/tests/test_stage_single_cell_experiment.py @@ -1,5 +1,166 @@ -from dolomite_base import stage_object, load_object +import os +from tempfile import mkdtemp + +import biocframe import dolomite_sce +import numpy +from dolomite_base import read_object, save_object +from singlecellexperiment import SingleCellExperiment +from summarizedexperiment import SummarizedExperiment + def test_stage_sce_basic(): - pass + x = numpy.random.rand(1000, 200) + se = SingleCellExperiment({"counts": x}) + + dir = os.path.join(mkdtemp(), "sce_simple") + save_object(se, dir) + + roundtrip = read_object(dir) + assert isinstance(roundtrip, SingleCellExperiment) + ass = roundtrip.assay("counts") + assert ass.shape == (1000, 200) + + # Works with multiple assays. + x2 = (numpy.random.rand(1000, 200) * 10).astype(numpy.int32) + se = SingleCellExperiment( + {"logcounts": x, "counts": x2}, main_experiment_name="aaron's secret modality" + ) + + dir = os.path.join(mkdtemp(), "sce_simple2") + save_object(se, dir) + + roundtrip = read_object(dir) + assert roundtrip.assay_names == ["logcounts", "counts"] + + +def test_stage_sce_with_dimdata_with_names(): + x = numpy.random.rand(1000, 200) + se = SingleCellExperiment( + assays={"counts": x}, + row_data=biocframe.BiocFrame(row_names=["gene" + str(i) for i in range(1000)]), + column_data=biocframe.BiocFrame( + row_names=["cell" + str(i) for i in range(200)] + ), + ) + + dir = os.path.join(mkdtemp(), "sce_dimdata2") + save_object(se, dir) + + roundtrip = read_object(dir) + assert isinstance(roundtrip, SingleCellExperiment) + assert se.row_data.row_names == roundtrip.row_data.row_names + assert se.column_data.row_names == roundtrip.column_data.row_names + + +def test_stage_sce_with_rdims(): + x = numpy.random.rand(1000, 200) + se = SingleCellExperiment( + assays={"counts": x}, + row_data=biocframe.BiocFrame( + {"foo": numpy.random.rand(1000), "bar": numpy.random.rand(1000)} + ), + column_data=biocframe.BiocFrame( + {"whee": numpy.random.rand(200), "stuff": numpy.random.rand(200)} + ), + reduced_dims={"tsnooch": numpy.random.rand(200, 4)}, + ) + + dir = os.path.join(mkdtemp(), "sce_dimdata") + save_object(se, dir) + + roundtrip = read_object(dir) + assert isinstance(roundtrip, SingleCellExperiment) + assert numpy.allclose(se.row_data["foo"], roundtrip.row_data["foo"]) + assert numpy.allclose(se.column_data["stuff"], roundtrip.column_data["stuff"]) + assert se.get_reduced_dim_names() == roundtrip.get_reduced_dim_names() + assert numpy.allclose( + se.reduced_dim("tsnooch"), numpy.array(roundtrip.reduced_dim("tsnooch")) + ) + + +def test_stage_sce_with_rdims_and_alts(): + x = numpy.random.rand(1000, 200) + se = SingleCellExperiment( + assays={"counts": x}, + row_data=biocframe.BiocFrame( + {"foo": numpy.random.rand(1000), "bar": numpy.random.rand(1000)} + ), + column_data=biocframe.BiocFrame( + {"whee": numpy.random.rand(200), "stuff": numpy.random.rand(200)} + ), + reduced_dims={"tsnooch": numpy.random.rand(200, 4)}, + alternative_experiments={ + "useless_modality": SummarizedExperiment( + {"counts": numpy.random.rand(100, 200)} + ) + }, + ) + + dir = os.path.join(mkdtemp(), "sce_dimdata") + save_object(se, dir) + + roundtrip = read_object(dir) + + print(se.reduced_dim("tsnooch"), roundtrip.reduced_dim("tsnooch")) + assert isinstance(roundtrip, SingleCellExperiment) + assert numpy.allclose(se.row_data["foo"], roundtrip.row_data["foo"]) + assert numpy.allclose(se.column_data["stuff"], roundtrip.column_data["stuff"]) + assert se.get_reduced_dim_names() == roundtrip.get_reduced_dim_names() + assert numpy.allclose( + se.reduced_dim("tsnooch"), numpy.array(roundtrip.reduced_dim("tsnooch")) + ) + assert ( + se.get_alternative_experiment_names() + == roundtrip.get_alternative_experiment_names() + ) + assert numpy.allclose( + se.alternative_experiment("useless_modality").assay("counts"), + numpy.array( + roundtrip.alternative_experiment("useless_modality").assay("counts") + ), + ) + + +def test_stage_sce_with_other_meta(): + x = numpy.random.rand(1000, 200) + se = SingleCellExperiment(assays={"counts": x}, metadata={"YAY": 2, "FOO": "a"}) + + dir = os.path.join(mkdtemp(), "sce_other_meta") + save_object(se, dir) + + roundtrip = read_object(dir) + assert roundtrip.metadata == se.metadata + + +def test_empty_sce(): + se = SingleCellExperiment(assays={}, metadata={"YAY": 2, "FOO": "a"}) + + dir = os.path.join(mkdtemp(), "sce_other_meta2") + save_object(se, dir) + + roundtrip = read_object(dir) + assert roundtrip.metadata == se.metadata + assert len(se.get_assay_names()) == len(roundtrip.get_assay_names()) + + +def test_empty_dimnames(): + se = SingleCellExperiment( + assays={}, + row_data=biocframe.BiocFrame(row_names=["gene" + str(i) for i in range(1000)]), + column_data=biocframe.BiocFrame( + row_names=["cell" + str(i) for i in range(200)] + ), + ) + + print(se.row_data) + print(se.get_row_data()) + + dir = os.path.join(mkdtemp(), "sce_dimdata3") + save_object(se, dir) + + roundtrip = read_object(dir) + assert isinstance(roundtrip, SingleCellExperiment) + assert se.row_data.row_names == roundtrip.row_data.row_names + assert se.column_data.row_names == roundtrip.column_data.row_names + assert len(se.get_assay_names()) == len(roundtrip.get_assay_names())