Skip to content

Commit

Permalink
Merge pull request #3 from karadavis-lab/nbl_cells/filter_by_immune_e…
Browse files Browse the repository at this point in the history
…xpression

🧐 Initial Scores for NBL Cells
  • Loading branch information
srivarra authored Oct 3, 2024
2 parents e60d4cc + 5600134 commit d2af0b3
Show file tree
Hide file tree
Showing 19 changed files with 2,023 additions and 615 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ __pycache__/
# IDEs
/.idea/
/.vscode/

/docs/notebooks/00 - Initialize LaminDB/00 - Clean Clinical Data.ipynb
78 changes: 78 additions & 0 deletions cliff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# git-cliff ~ configuration file
# https://git-cliff.org/docs/configuration

[changelog]
# template for the changelog footer
header = """
# Changelog\n
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n
"""
# template for the changelog body
# https://keats.github.io/tera/docs/#introduction
body = """
{% if version -%}
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else -%}
## [Unreleased]
{% endif -%}
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | upper_first }}
{% for commit in commits %}
- {{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
# template for the changelog footer
footer = """
{% for release in releases -%}
{% if release.version -%}
{% if release.previous.version -%}
[{{ release.version | trim_start_matches(pat="v") }}]: \
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\
/compare/{{ release.previous.version }}..{{ release.version }}
{% endif -%}
{% else -%}
[unreleased]: https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\
/compare/{{ release.previous.version }}..HEAD
{% endif -%}
{% endfor %}
<!-- generated by git-cliff -->
"""
# remove the leading and trailing whitespace from the templates
trim = true

[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^.*: add", group = "Added" },
{ message = "^.*: support", group = "Added" },
{ message = "^.*: remove", group = "Removed" },
{ message = "^.*: delete", group = "Removed" },
{ message = "^test", group = "Fixed" },
{ message = "^fix", group = "Fixed" },
{ message = "^.*: fix", group = "Fixed" },
{ message = "^.*", group = "Changed" },
]
# protect breaking changes from being skipped due to matching a skipping commit_parser
protect_breaking_commits = false
# filter out the commits that are not matched by commit parsers
filter_commits = true
# regex for matching git tags
tag_pattern = "v[0-9].*"
# regex for skipping tags
skip_tags = "v0.1.0-beta.1"
# regex for ignoring tags
ignore_tags = ""
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
"import natsort as ns\n",
"import bionty as bt\n",
"import lamindb as ln\n",
"from lnschema_core.models import Registry"
"from lnschema_core.models import Registry\n",
"import buckaroo # noqa: F401"
]
},
{
Expand All @@ -72,11 +73,10 @@
"metadata": {},
"outputs": [],
"source": [
"ln.settings.transform.stem_uid = \"4DLIySb5QY32\"\n",
"ln.settings.transform.version = \"1\"\n",
"ln.context.uid = \"XjYRETQ3dpPB0000\"\n",
"ln.context.version = \"1\"\n",
"ln.settings.sync_git_repo = \"https://github.com/karadavis-lab/nbl.git\"\n",
"run = ln.track()\n",
"run.transform"
"ln.context.track()"
]
},
{
Expand Down Expand Up @@ -780,7 +780,6 @@
"clinical_artifact = ln.Artifact.from_df(\n",
" df=clinical_data,\n",
" key=\"clinical_data.parquet\",\n",
" run=run,\n",
" description=\"Contains sample level clinical data\",\n",
" version=\"1\",\n",
")\n",
Expand Down Expand Up @@ -916,13 +915,6 @@
"source": [
"ln.finish()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
106 changes: 59 additions & 47 deletions docs/notebooks/00 - Initialize LaminDB/01 - Add SpatialData.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@
"import natsort as ns\n",
"import lamindb as ln\n",
"from nbl.util import DaskLocalCluster, reset_table_index\n",
"import nbl\n",
"import spatialdata as sd"
"import nbl"
]
},
{
Expand All @@ -73,11 +72,11 @@
"metadata": {},
"outputs": [],
"source": [
"ln.settings.transform.stem_uid = \"sDPLFLgnLcbi\"\n",
"ln.settings.transform.version = \"1\"\n",
"ln.context.uid = \"FGEcC5bGULbo0000\"\n",
"ln.context.version = \"1\"\n",
"ln.settings.sync_git_repo = \"https://github.com/karadavis-lab/nbl.git\"\n",
"run = ln.track()\n",
"run.transform"
"\n",
"ln.context.track()"
]
},
{
Expand Down Expand Up @@ -154,16 +153,9 @@
"metadata": {},
"outputs": [],
"source": [
"nbl.io.convert_cohort(fov_dir=fov_dir, label_dir=label_dir, filter_fovs=r\"Hu-*\", file_path=hu_data_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hu_sdata = sd.read_zarr(store=hu_data_path)"
"hu_sdata = nbl.io.convert_cohort(\n",
" fov_dir=fov_dir, label_dir=label_dir, filter_fovs=r\"Hu-*\", file_path=hu_data_path, return_sdata=True\n",
")"
]
},
{
Expand Down Expand Up @@ -231,16 +223,9 @@
"metadata": {},
"outputs": [],
"source": [
"nbl.io.convert_cohort(fov_dir=fov_dir, filter_fovs=r\"NBL-\\d+-R\\d+C\\d+\", label_dir=label_dir, file_path=nbl_data_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nbl_sdata = sd.read_zarr(store=nbl_data_path)"
"nbl_sdata = nbl.io.convert_cohort(\n",
" fov_dir=fov_dir, filter_fovs=r\"NBL-\\d+-R\\d+C\\d+\", label_dir=label_dir, file_path=nbl_data_path, return_sdata=True\n",
")"
]
},
{
Expand Down Expand Up @@ -299,7 +284,7 @@
"metadata": {},
"outputs": [],
"source": [
"pixie_clusters_path = (\n",
"pixie_clusters_path: UPath = (\n",
" original_data_path / \"segmentation\" / \"cell_table\" / \"cell_table_size_normalized_cell_labels_noCD117.csv\"\n",
")"
]
Expand Down Expand Up @@ -385,26 +370,26 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"nbl_sdata.tables[\"whole_cell\"].obs"
"## Add Clinical Information"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add Clinical Information"
"### Load Clinical Data from LaminDB"
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"### Load Clinical Data from LaminDB"
"clinical_data: pd.DataFrame = ln.Artifact.filter(key__contains=\"clinical_data\").one().load()"
]
},
{
Expand All @@ -413,7 +398,7 @@
"metadata": {},
"outputs": [],
"source": [
"clinical_data: pd.DataFrame = ln.Artifact.filter(key__contains=\"clinical_data\").one().load()"
"clinical_data"
]
},
{
Expand All @@ -422,7 +407,14 @@
"metadata": {},
"outputs": [],
"source": [
"cols_to_drop = [\"Clinical presentation\", \"treatment btw biopsies\"]"
"cols_to_keep = [\n",
" \"fov\",\n",
" \"Risk\",\n",
" \"Classification\",\n",
" \"Sex\",\n",
" \"Ethnicity\",\n",
" \"Tissue\",\n",
"]"
]
},
{
Expand All @@ -431,7 +423,7 @@
"metadata": {},
"outputs": [],
"source": [
"filtered_clincial_data = clinical_data.drop(columns=cols_to_drop)"
"filtered_clincial_data = clinical_data.filter(items=cols_to_keep)"
]
},
{
Expand All @@ -446,6 +438,15 @@
"nbl_sdata.tables[\"whole_cell\"].strings_to_categoricals()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nbl.util.write_elements(sdata=nbl_sdata, elements={\"tables\": [\"whole_cell\"]})"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -460,7 +461,13 @@
"outputs": [],
"source": [
"nbl.pp.arcsinh_transform(\n",
" sdata=nbl_sdata, table_names=\"whole_cell\", shift_factor=0, scale_factor=150, replace_X=True, write=True\n",
" sdata=nbl_sdata,\n",
" table_names=\"whole_cell\",\n",
" shift_factor=0,\n",
" scale_factor=150,\n",
" method=\"new table\",\n",
" write=True,\n",
" inplace=True,\n",
")"
]
},
Expand All @@ -470,7 +477,13 @@
"metadata": {},
"outputs": [],
"source": [
"hu_artifact = ln.Artifact(data=hu_data_path, type=\"dataset\", key=\"Hu.zarr\", description=\"Control Tissue\")\n",
"hu_artifact = ln.Artifact(\n",
" data=hu_data_path,\n",
" type=\"dataset\",\n",
" key=\"Hu.zarr\",\n",
" description=\"Control Tissue\",\n",
" revises=ln.Artifact.filter(key__contains=\"Hu.zarr\").one(),\n",
")\n",
"\n",
"hu_artifact.save(upload=True)"
]
Expand All @@ -481,7 +494,13 @@
"metadata": {},
"outputs": [],
"source": [
"nbl_artifact = ln.Artifact(data=nbl_data_path, type=\"dataset\", key=\"nbl.zarr\", description=\"NBL Tissue Samples\")\n",
"nbl_artifact = ln.Artifact(\n",
" data=nbl_data_path,\n",
" type=\"dataset\",\n",
" key=\"nbl.zarr\",\n",
" description=\"NBL Tissue Samples\",\n",
" revises=ln.Artifact.filter(key__contains=\"nbl.zarr\").one(),\n",
")\n",
"\n",
"nbl_artifact.save(upload=True)"
]
Expand All @@ -494,13 +513,6 @@
"source": [
"ln.finish()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit d2af0b3

Please sign in to comment.