Skip to content

Commit

Permalink
do optimization for filtering and merging.
Browse files Browse the repository at this point in the history
  • Loading branch information
tanliwei-coder committed Oct 9, 2024
1 parent 09fc33b commit b99506c
Show file tree
Hide file tree
Showing 15 changed files with 678 additions and 273 deletions.
2 changes: 1 addition & 1 deletion docs/source/Tutorials(Multi-sample)/SpaSEG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@
"source": [
"## Running SpaSEG on multi-sample\n",
"\n",
"This process can be seen as an clustering, `min_label` represents the number of cell/bin types you expect, the training will stop when the number of cell/bin types reaches `min_label` or the number of iterations reaches `iterations`.\n",
"This process can be seen as a clustering, `min_label` represents the number of cell/bin types you expect, the training will stop when the number of cell/bin types reaches `min_label` or the number of iterations reaches `iterations`.\n",
"\n",
"For more information you can refer to api [ms_data.tl.spa_seg](../content/stereo.algorithm.spa_seg.SpaSeg.main.html)."
]
Expand Down
6 changes: 3 additions & 3 deletions docs/source/Tutorials/SingleR.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,12 @@
],
"source": [
"# preprocessing\n",
"ref.tl.log1p()\n",
"ref.tl.normalize_total()\n",
"ref.tl.log1p()\n",
"\n",
"data.tl.cal_qc()\n",
"data.tl.log1p()\n",
"data.tl.normalize_total()"
"data.tl.normalize_total()\n",
"data.tl.log1p()"
]
},
{
Expand Down
104 changes: 57 additions & 47 deletions stereo/core/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,21 @@ def __init__(
self.cell_name = cell_name
else:
self._obs = pd.DataFrame(index=cell_name if cell_name is None else cell_name.astype('U'))
# self.loc = self._obs.loc
self._matrix = dict()
self._pairwise = dict()
if batch is not None:
self._obs['batch'] = self._set_batch(batch)
self._cell_border = cell_border
self.batch = self._set_batch(batch)
self.cell_border = cell_border
self.cell_point = None

def __contains__(self, item):
return item in self._obs.columns

# def __setattr__(self, key, value):
# if key in {'_obs', '_matrix', '_pairwise', '_cell_border', 'cell_name', 'cell_border', 'loc', 'cell_point'}:
# object.__setattr__(self, key, value)
# elif key == 'batch':
# self._obs[key] = self._set_batch(value)
# else:
# if value is not None:
# self._obs[key] = value

def __setitem__(self, key, value):
if value is not None:
self._obs[key] = value

def __getitem__(self, key):
# if key not in self._obs.columns:
# return None
return self._obs[key]

def __len__(self):
Expand All @@ -81,6 +69,10 @@ def pairwise(self):
def size(self):
return self._obs.index.size

@property
def shape(self):
return self._obs.shape

@property
def loc(self):
return self._obs.loc
Expand Down Expand Up @@ -150,13 +142,17 @@ def cell_name(self, name: np.ndarray):

@property
def cell_border(self):
return self._cell_border
return self._matrix.get('cell_border', None)

@cell_border.setter
def cell_border(self, cell_border: np.ndarray):
if cell_border is None:
return
if not isinstance(cell_border, np.ndarray):
raise TypeError('cell border must be a np.ndarray object.')
self._cell_border = cell_border
if len(cell_border.shape) != 3:
raise Exception(f'The cell border must have 3 dimensions, but now {len(cell_border.shape)}.')
self._matrix['cell_border'] = cell_border

@property
def batch(self):
Expand All @@ -166,7 +162,9 @@ def batch(self):

@batch.setter
def batch(self, batch):
self._obs['batch'] = self._set_batch(batch)
if batch is not None:
self._obs['batch'] = self._set_batch(batch)
self._obs['batch'] = self._obs['batch'].astype('category')

def _set_batch(self, batch: Union[np.ndarray, list, int, str]):
if batch is None:
Expand All @@ -192,16 +190,10 @@ def sub_set(self, index):
"""
if isinstance(index, pd.Series):
index = index.to_numpy()
if self.cell_border is not None:
self.cell_border = self.cell_border[index]
self._obs = self._obs.iloc[index].copy()
for col in self._obs.columns:
if self._obs[col].dtype.name == 'category':
self._obs[col] = self._obs[col].cat.remove_unused_categories()
for key, value in self._matrix.items():
if isinstance(value, pd.DataFrame):
self._matrix[key] = value.iloc[index].copy()
self._matrix[key].reset_index(drop=True, inplace=True)
elif isinstance(value, (np.ndarray, spmatrix)):
self._matrix[key] = value[index]
else:
Expand All @@ -211,7 +203,6 @@ def sub_set(self, index):
if isinstance(value, pd.DataFrame):
columns = value.columns[index]
self._pairwise[key] = value.iloc[index][columns].copy()
self._pairwise[key].reset_index(drop=True, inplace=True)
elif isinstance(value, (np.ndarray, spmatrix)):
if len(value.shape) != 2:
logger.warning(f'Subsetting from {key} of shape {value.shape} in cell.pairwise is not supported.')
Expand All @@ -222,13 +213,14 @@ def sub_set(self, index):
if isinstance(v, pd.DataFrame):
columns = v.columns[index]
self._pairwise[key][k] = v.iloc[index][columns].copy()
self._pairwise[key][k].reset_index(drop=True, inplace=True)
elif isinstance(v, (np.ndarray, spmatrix)):
self._pairwise[key][k] = v[index][:, index]
else:
logger.warning(f'Subsetting from {key}.{k} of type {type(v)} in cell.pairwise is not supported.')
else:
logger.warning(f'Subsetting from {key} of type {type(value)} in cell.pairwise is not supported.')

self._remove_unused_categories()
return self

def get_property(self, name):
Expand Down Expand Up @@ -261,28 +253,48 @@ def __str__(self):
def _repr_html_(self):
obs: pd.DataFrame = self.to_df()
return obs._repr_html_()


def _remove_unused_categories(self):
for col in self.obs.columns:
if self.obs[col].dtype.name == 'category':
self.obs[col] = self.obs[col].cat.remove_unused_categories()

for ins in (self._matrix, self._pairwise):
for key, value in ins.items():
if isinstance(value, pd.DataFrame):
for col in value.columns:
if value[col].dtype.name == 'category':
value[col] = value[col].cat.remove_unused_categories()

class AnnBasedCell(Cell):

def __init__(self, based_ann_data: AnnData, cell_name: Optional[np.ndarray] = None,
cell_border: Optional[np.ndarray] = None,
batch: Optional[Union[np.ndarray, list, int, str]] = None):
def __init__(
self,
based_ann_data: AnnData,
cell_name: Optional[np.ndarray] = None,
cell_border: Optional[np.ndarray] = None,
batch: Optional[Union[np.ndarray, list, int, str]] = None
):
self.__based_ann_data = based_ann_data
super(AnnBasedCell, self).__init__(cell_name=cell_name)
if cell_border is not None:
self.cell_border = cell_border
# super(AnnBasedCell, self).__init__(cell_name=cell_name)
# if cell_border is not None:
# self.cell_border = cell_border
# if batch is not None:
# self.batch = batch

# super(AnnBasedCell, self).__init__(obs=based_ann_data._obs, cell_name=cell_name, batch=batch)
if cell_name is not None:
self.cell_name = cell_name
if batch is not None:
self.batch = batch
if cell_border is not None:
self.cell_border = cell_border

def __setattr__(self, key, value):
if key == '_obs':
return
# elif key == 'batch':
# self.__based_ann_data.obs[key] = self._set_batch(value)
# self.__based_ann_data.obs[key] = self.__based_ann_data.obs[key].astype('category')
else:
object.__setattr__(self, key, value)
# def __setattr__(self, key, value):
# if key == '_obs':
# return
# else:
# object.__setattr__(self, key, value)

def __str__(self):
return str(self.__based_ann_data.obs)
Expand All @@ -291,16 +303,14 @@ def __repr__(self):
return self.__str__()

def __getitem__(self, item):
# if item not in self.__based_ann_data.obs.columns:
# return None
return self.__based_ann_data.obs[item]

def __contains__(self, item):
return item in self.__based_ann_data.obs.columns

@property
def _obs(self):
return self.__based_ann_data.obs
return self.__based_ann_data._obs

@property
def obs(self):
Expand Down Expand Up @@ -385,10 +395,10 @@ def n_genes_by_counts(self, new_n_genes_by_counts):
# return None
# return self.__based_ann_data._obs['batch'].to_numpy()

@Cell.batch.setter
def batch(self, batch):
self.__based_ann_data.obs['batch'] = self._set_batch(batch)
self.__based_ann_data.obs['batch'] = self.__based_ann_data.obs['batch'].astype('category')
# @Cell.batch.setter
# def batch(self, batch):
# self.__based_ann_data.obs['batch'] = self._set_batch(batch)
# self.__based_ann_data.obs['batch'] = self.__based_ann_data.obs['batch'].astype('category')

@property
def cell_border(self):
Expand Down
57 changes: 57 additions & 0 deletions stereo/core/data_component.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from typing import Union
from copy import deepcopy

from anndata import AnnData
import numpy as np
from scipy.sparse import spmatrix
import pandas as pd

from . import stereo_exp_data

class Layers(dict):
def __init__(
self,
data: 'stereo_exp_data.StereoExpData',
*args,
**kwargs
):
super(Layers, self).__init__(*args, **kwargs)
self.__stereo_exp_data = data

def __deepcopy__(self, memo=None):
if memo is None:
memo = {}
if id(self) in memo:
new_layers = memo[id(self)]
else:
new_layers = Layers(self.__stereo_exp_data)
memo[id(self)] = new_layers
if id(self.__stereo_exp_data) in memo:
data = memo[id(self.__stereo_exp_data)]
else:
data = deepcopy(self.__stereo_exp_data, memo)

new_attrs = {
deepcopy(k, memo): deepcopy(v, memo) for k, v in self.__dict__.items() if k != '_Layers__stereo_exp_data'
}
new_attrs['_Layers__stereo_exp_data'] = data
new_layers.__dict__.update(new_attrs)
for k, v in self.items():
dict.__setitem__(new_layers, deepcopy(k, memo), deepcopy(v, memo))
return new_layers

def __setitem__(self, key, value):
if not isinstance(value, (np.ndarray, spmatrix, pd.DataFrame)):
raise ValueError("layer must be np.ndarray, spmatrix or pd.DataFrame.")
if value.shape != self.__stereo_exp_data.shape:
raise ValueError(f"in layer '{key}', expected shape {self.__stereo_exp_data.shape}, but got {value.shape}.")
if isinstance(value, pd.DataFrame):
value = value.to_numpy(copy=True)
super().__setitem__(key, value)

def __str__(self) -> str:
info = f"layers with keys {list(self.keys())}."
return info

def __repr__(self) -> str:
return self.__str__()
Loading

0 comments on commit b99506c

Please sign in to comment.