-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
539 additions
and
5,947 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
Gaussian smooth | ||
---------------- | ||
This example show how to run the function of gaussian smooth on Stereopy. | ||
|
||
Gaussian smooth can make expression matrix closer, the detail of algorithm refer to https://www.biorxiv.org/content/10.1101/2022.05.26.493527v1.abstract. | ||
|
||
Generally, you should do some preprocessing such as filtering cells, filtering genes, normalization, pca before running gaussian smooth. | ||
|
||
Especially, you must to save raw expression matrix by running raw_checkpoint before all of the operations such as normalization those will change the values of expression matrix. | ||
|
||
Once ran the raw_checkpoint, you can not run the operations those will change the 1-dimension of expression matrix before running gaussian smooth. | ||
|
||
Also, you need to run pca before running gaussian smooth. | ||
|
||
.. code:: python | ||
import stereo as st | ||
input_file = "/jdfssz2/ST_BIOINTEL/P20Z10200N0039/06.user/liulin4/demo/jiace/raw/SS200000141TL_B5_raw.h5ad" | ||
data = st.io.read_ann_h5ad(input_file, spatial_key='spatial') | ||
data.tl.cal_qc() | ||
data.tl.filter_cells(min_gene=300, pct_counts_mt=10) | ||
data.tl.filter_genes(min_cell=10) | ||
data.tl.raw_checkpoint() | ||
data.tl.normalize_total(target_sum=10000) | ||
data.tl.log1p() | ||
data.tl.pca(use_highly_genes=False, n_pcs=50, svd_solver='arpack') | ||
data.tl.gaussian_smooth(n_neighbors=10, smooth_threshold=90) | ||
data.tl.scale(max_value=10) #only for gaussian_smooth_scatter_by_gene | ||
data.plt.gaussian_smooth_scatter_by_gene(gene_name='C1ql2') | ||
data.plt.gaussian_smooth_scatter_by_gene(gene_name='Irx2') | ||
data.plt.gaussian_smooth_scatter_by_gene(gene_name='Calb1') | ||
+--------------------------------------------+--------------------------------+ | ||
|.. image:: ../_static/gaussian_smooth_1.png |.. image:: ../_static/C1ql2.jpg | | ||
+--------------------------------------------+--------------------------------+ | ||
|.. image:: ../_static/gaussian_smooth_2.png |.. image:: ../_static/Inx2.jpg | | ||
+--------------------------------------------+--------------------------------+ | ||
|.. image:: ../_static/gaussian_smooth_3.png |.. image:: ../_static/cabl1.jpg | | ||
+--------------------------------------------+--------------------------------+ | ||
|
||
After, if you want to do other operations such as clustering, you need to do the same preprocessing you did before. | ||
|
||
Because of the preprocessing you did before just only for searching the nearest points, the result still base on the raw expression matrix saved by running raw_checkpoint. | ||
|
||
.. code:: python | ||
import os | ||
import stereo as st | ||
input_file = "/jdfssz2/ST_BIOINTEL/P20Z10200N0039/06.user/liulin4/demo/jiace/raw/SS200000141TL_B5_raw.h5ad" | ||
data = st.io.read_ann_h5ad(input_file, spatial_key='spatial') | ||
data.tl.cal_qc() | ||
data.tl.filter_cells(min_gene=300, pct_counts_mt=10) | ||
data.tl.filter_genes(min_cell=10) | ||
data.tl.raw_checkpoint() | ||
data.tl.normalize_total(target_sum=10000) | ||
data.tl.log1p() | ||
data.tl.pca(use_highly_genes=False, n_pcs=50, svd_solver='arpack') | ||
data.tl.gaussian_smooth(n_neighbors=10, smooth_threshold=90) | ||
data.tl.normalize_total(target_sum=10000) | ||
data.tl.log1p() | ||
data.tl.pca(use_highly_genes=False, n_pcs=50, svd_solver='arpack') | ||
data.tl.neighbors(pca_res_key='pca', n_pcs=30, res_key='neighbors') | ||
data.tl.leiden(neighbors_res_key='neighbors', res_key='leiden') | ||
data.plt.cluster_scatter(res_key='leiden') | ||
Gaussian smooth can make clustering result to more subtypes. | ||
|
||
+---------------------------------------------------+---------------------------------------------------+ | ||
|Before |After | | ||
+===================================================+===================================================+ | ||
|.. image:: ../_static/clustering_before_smooth.png |.. image:: ../_static/clustering_after_smooth.png | | ||
+---------------------------------------------------+---------------------------------------------------+ |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import numpy as np | ||
import scipy.sparse as sp | ||
from scipy.spatial import distance | ||
from sklearn.neighbors import NearestNeighbors | ||
from ..log_manager import logger | ||
from ..utils.time_consume import TimeConsume | ||
|
||
def _gaussan_c(dis, gs=0.95, a=1, b=0): | ||
return np.sqrt(-(dis - b)**2 / 2 / np.log(gs / a)) | ||
|
||
def _gaussan_weight(dis, a=1, b=0, c=12500): | ||
gs = a * np.exp(-(dis - b)**2 / 2 / (c**2)) | ||
return gs | ||
|
||
|
||
def _sp_graph_weight(arr, a=1, b=0, c=12500): | ||
out = arr.copy() | ||
row, col = np.nonzero(arr) | ||
for ro, co in zip(row, col): | ||
out[ro, co] = _gaussan_weight(arr[ro, co], a=a, b=b, c=c) | ||
return out | ||
|
||
def gaussian_smooth(pca_exp_matrix: np.ndarray, | ||
raw_exp_matrix: np.ndarray, | ||
cells_position: np.ndarray, | ||
n_neighbors: int = 10, | ||
smooth_threshold: float = 90, | ||
a: float = 1, | ||
b: float = 0): | ||
if sp.issparse(pca_exp_matrix): | ||
pca_exp_matrix = pca_exp_matrix.toarray() | ||
if sp.issparse(raw_exp_matrix): | ||
raw_exp_matrix = raw_exp_matrix.toarray() | ||
tc = TimeConsume() | ||
tk = tc.start() | ||
Euc_distance = distance.cdist(cells_position, cells_position).astype(np.float32) | ||
# logger.info(f'distance.cdist: {tc.get_time_consumed(tk)}') | ||
|
||
nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree').fit(pca_exp_matrix) | ||
# logger.info(f'NearestNeighbors.fit: {tc.get_time_consumed(tk)}') | ||
|
||
adjecent_matrice = nbrs.kneighbors_graph(pca_exp_matrix).astype(np.float32).toarray() | ||
# logger.info(f'kneighbors_graph: {tc.get_time_consumed(tk, restart=False)}') | ||
aa = np.multiply(adjecent_matrice, Euc_distance) ## 自己和自己的距离为0 | ||
aa_nonzero = aa[np.nonzero(aa)] | ||
# aa = aa.tocsr() | ||
dist_threshold = np.percentile(aa_nonzero, smooth_threshold) | ||
c = _gaussan_c(dist_threshold) | ||
##### smoothing | ||
gauss_weight = _sp_graph_weight(aa, a, b, c) | ||
temp_nor_para = np.squeeze(np.asarray(np.sum(gauss_weight, axis=1))) | ||
|
||
new_adata = np.asarray(((gauss_weight.dot(raw_exp_matrix)).T / temp_nor_para).T) | ||
return new_adata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.