Merge pull request #3 from karadavis-lab/nbl_cells/filter_by_immune_e…

…xpression 🧐 Initial Scores for NBL Cells
karadavis-lab · Oct 3, 2024 · d2af0b3 · d2af0b3
2 parents e60d4cc + 5600134
commit d2af0b3
Show file tree

Hide file tree

Showing 19 changed files with 2,023 additions and 615 deletions.
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,5 @@ __pycache__/
 # IDEs
 /.idea/
 /.vscode/
+
+/docs/notebooks/00 - Initialize LaminDB/00 - Clean Clinical Data.ipynb
diff --git a/cliff.toml b/cliff.toml
@@ -0,0 +1,78 @@
+# git-cliff ~ configuration file
+# https://git-cliff.org/docs/configuration
+
+[changelog]
+# template for the changelog footer
+header = """
+# Changelog\n
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n
+"""
+# template for the changelog body
+# https://keats.github.io/tera/docs/#introduction
+body = """
+{% if version -%}
+    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
+{% else -%}
+    ## [Unreleased]
+{% endif -%}
+{% for group, commits in commits | group_by(attribute="group") %}
+    ### {{ group | upper_first }}
+    {% for commit in commits %}
+        - {{ commit.message | upper_first }}\
+    {% endfor %}
+{% endfor %}\n
+"""
+# template for the changelog footer
+footer = """
+{% for release in releases -%}
+    {% if release.version -%}
+        {% if release.previous.version -%}
+            [{{ release.version | trim_start_matches(pat="v") }}]: \
+                https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\
+                    /compare/{{ release.previous.version }}..{{ release.version }}
+        {% endif -%}
+    {% else -%}
+        [unreleased]: https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\
+            /compare/{{ release.previous.version }}..HEAD
+    {% endif -%}
+{% endfor %}
+<!-- generated by git-cliff -->
+"""
+# remove the leading and trailing whitespace from the templates
+trim = true
+
+[git]
+# parse the commits based on https://www.conventionalcommits.org
+conventional_commits = true
+# filter out the commits that are not conventional
+filter_unconventional = true
+# process each line of a commit as an individual commit
+split_commits = false
+# regex for parsing and grouping commits
+commit_parsers = [
+  { message = "^.*: add", group = "Added" },
+  { message = "^.*: support", group = "Added" },
+  { message = "^.*: remove", group = "Removed" },
+  { message = "^.*: delete", group = "Removed" },
+  { message = "^test", group = "Fixed" },
+  { message = "^fix", group = "Fixed" },
+  { message = "^.*: fix", group = "Fixed" },
+  { message = "^.*", group = "Changed" },
+]
+# protect breaking changes from being skipped due to matching a skipping commit_parser
+protect_breaking_commits = false
+# filter out the commits that are not matched by commit parsers
+filter_commits = true
+# regex for matching git tags
+tag_pattern = "v[0-9].*"
+# regex for skipping tags
+skip_tags = "v0.1.0-beta.1"
+# regex for ignoring tags
+ignore_tags = ""
+# sort the tags topologically
+topo_order = false
+# sort the commits inside sections by oldest/newest order
+sort_commits = "oldest"
diff --git a/docs/notebooks/00 - Initialize LaminDB/00 - Clean Clinical Data.ipynb b/docs/notebooks/00 - Initialize LaminDB/00 - Clean Clinical Data.ipynb
@@ -52,7 +52,8 @@
     "import natsort as ns\n",
     "import bionty as bt\n",
     "import lamindb as ln\n",
-    "from lnschema_core.models import Registry"
+    "from lnschema_core.models import Registry\n",
+    "import buckaroo  # noqa: F401"
    ]
   },
   {
@@ -72,11 +73,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ln.settings.transform.stem_uid = \"4DLIySb5QY32\"\n",
-    "ln.settings.transform.version = \"1\"\n",
+    "ln.context.uid = \"XjYRETQ3dpPB0000\"\n",
+    "ln.context.version = \"1\"\n",
     "ln.settings.sync_git_repo = \"https://github.com/karadavis-lab/nbl.git\"\n",
-    "run = ln.track()\n",
-    "run.transform"
+    "ln.context.track()"
    ]
   },
   {
@@ -780,7 +780,6 @@
     "clinical_artifact = ln.Artifact.from_df(\n",
     "    df=clinical_data,\n",
     "    key=\"clinical_data.parquet\",\n",
-    "    run=run,\n",
     "    description=\"Contains sample level clinical data\",\n",
     "    version=\"1\",\n",
     ")\n",
@@ -916,13 +915,6 @@
    "source": [
     "ln.finish()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/docs/notebooks/00 - Initialize LaminDB/01 - Add SpatialData.ipynb b/docs/notebooks/00 - Initialize LaminDB/01 - Add SpatialData.ipynb
@@ -52,8 +52,7 @@
     "import natsort as ns\n",
     "import lamindb as ln\n",
     "from nbl.util import DaskLocalCluster, reset_table_index\n",
-    "import nbl\n",
-    "import spatialdata as sd"
+    "import nbl"
    ]
   },
   {
@@ -73,11 +72,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ln.settings.transform.stem_uid = \"sDPLFLgnLcbi\"\n",
-    "ln.settings.transform.version = \"1\"\n",
+    "ln.context.uid = \"FGEcC5bGULbo0000\"\n",
+    "ln.context.version = \"1\"\n",
     "ln.settings.sync_git_repo = \"https://github.com/karadavis-lab/nbl.git\"\n",
-    "run = ln.track()\n",
-    "run.transform"
+    "\n",
+    "ln.context.track()"
    ]
   },
   {
@@ -154,16 +153,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nbl.io.convert_cohort(fov_dir=fov_dir, label_dir=label_dir, filter_fovs=r\"Hu-*\", file_path=hu_data_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hu_sdata = sd.read_zarr(store=hu_data_path)"
+    "hu_sdata = nbl.io.convert_cohort(\n",
+    "    fov_dir=fov_dir, label_dir=label_dir, filter_fovs=r\"Hu-*\", file_path=hu_data_path, return_sdata=True\n",
+    ")"
    ]
   },
   {
@@ -231,16 +223,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nbl.io.convert_cohort(fov_dir=fov_dir, filter_fovs=r\"NBL-\\d+-R\\d+C\\d+\", label_dir=label_dir, file_path=nbl_data_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "nbl_sdata = sd.read_zarr(store=nbl_data_path)"
+    "nbl_sdata = nbl.io.convert_cohort(\n",
+    "    fov_dir=fov_dir, filter_fovs=r\"NBL-\\d+-R\\d+C\\d+\", label_dir=label_dir, file_path=nbl_data_path, return_sdata=True\n",
+    ")"
    ]
   },
   {
@@ -299,7 +284,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pixie_clusters_path = (\n",
+    "pixie_clusters_path: UPath = (\n",
     "    original_data_path / \"segmentation\" / \"cell_table\" / \"cell_table_size_normalized_cell_labels_noCD117.csv\"\n",
     ")"
    ]
@@ -385,26 +370,26 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "nbl_sdata.tables[\"whole_cell\"].obs"
+    "## Add Clinical Information"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Add Clinical Information"
+    "### Load Clinical Data from LaminDB"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "### Load Clinical Data from LaminDB"
+    "clinical_data: pd.DataFrame = ln.Artifact.filter(key__contains=\"clinical_data\").one().load()"
    ]
   },
   {
@@ -413,7 +398,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "clinical_data: pd.DataFrame = ln.Artifact.filter(key__contains=\"clinical_data\").one().load()"
+    "clinical_data"
    ]
   },
   {
@@ -422,7 +407,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cols_to_drop = [\"Clinical presentation\", \"treatment btw biopsies\"]"
+    "cols_to_keep = [\n",
+    "    \"fov\",\n",
+    "    \"Risk\",\n",
+    "    \"Classification\",\n",
+    "    \"Sex\",\n",
+    "    \"Ethnicity\",\n",
+    "    \"Tissue\",\n",
+    "]"
    ]
   },
   {
@@ -431,7 +423,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "filtered_clincial_data = clinical_data.drop(columns=cols_to_drop)"
+    "filtered_clincial_data = clinical_data.filter(items=cols_to_keep)"
    ]
   },
   {
@@ -446,6 +438,15 @@
     "nbl_sdata.tables[\"whole_cell\"].strings_to_categoricals()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nbl.util.write_elements(sdata=nbl_sdata, elements={\"tables\": [\"whole_cell\"]})"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -460,7 +461,13 @@
    "outputs": [],
    "source": [
     "nbl.pp.arcsinh_transform(\n",
-    "    sdata=nbl_sdata, table_names=\"whole_cell\", shift_factor=0, scale_factor=150, replace_X=True, write=True\n",
+    "    sdata=nbl_sdata,\n",
+    "    table_names=\"whole_cell\",\n",
+    "    shift_factor=0,\n",
+    "    scale_factor=150,\n",
+    "    method=\"new table\",\n",
+    "    write=True,\n",
+    "    inplace=True,\n",
     ")"
    ]
   },
@@ -470,7 +477,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hu_artifact = ln.Artifact(data=hu_data_path, type=\"dataset\", key=\"Hu.zarr\", description=\"Control Tissue\")\n",
+    "hu_artifact = ln.Artifact(\n",
+    "    data=hu_data_path,\n",
+    "    type=\"dataset\",\n",
+    "    key=\"Hu.zarr\",\n",
+    "    description=\"Control Tissue\",\n",
+    "    revises=ln.Artifact.filter(key__contains=\"Hu.zarr\").one(),\n",
+    ")\n",
     "\n",
     "hu_artifact.save(upload=True)"
    ]
@@ -481,7 +494,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nbl_artifact = ln.Artifact(data=nbl_data_path, type=\"dataset\", key=\"nbl.zarr\", description=\"NBL Tissue Samples\")\n",
+    "nbl_artifact = ln.Artifact(\n",
+    "    data=nbl_data_path,\n",
+    "    type=\"dataset\",\n",
+    "    key=\"nbl.zarr\",\n",
+    "    description=\"NBL Tissue Samples\",\n",
+    "    revises=ln.Artifact.filter(key__contains=\"nbl.zarr\").one(),\n",
+    ")\n",
     "\n",
     "nbl_artifact.save(upload=True)"
    ]
@@ -494,13 +513,6 @@
    "source": [
     "ln.finish()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {