From 9dae50e28b8eefa37786ce42e523797ffc29cbe8 Mon Sep 17 00:00:00 2001 From: tanliwei Date: Fri, 20 Dec 2024 14:38:09 +0800 Subject: [PATCH] update for the reproducibility of MicroEnvs --- stereo/algorithm/cell_cell_communication/spatial_scoloc.py | 6 ++++-- stereo/algorithm/gen_ccc_micro_envs.py | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/stereo/algorithm/cell_cell_communication/spatial_scoloc.py b/stereo/algorithm/cell_cell_communication/spatial_scoloc.py index 495ca3f3..1a27e729 100644 --- a/stereo/algorithm/cell_cell_communication/spatial_scoloc.py +++ b/stereo/algorithm/cell_cell_communication/spatial_scoloc.py @@ -34,7 +34,8 @@ def main(self, min_num: int = 30, binsize: float = 2, eps: float = 1e-20, - output_path: str = None + output_path: str = None, + seed_used: int = 1024 ): """ Generate the micro-environment file used for the CCC analysis. @@ -90,8 +91,9 @@ def main(self, for col in type_name: pairwise_kl_df[col] = pairwise_kl_df[col].apply(lambda x: []) # do bootstrap + np.random.seed(seed_used) for i in range(n_boot): - random.seed(i) + # random.seed(i) # Get the bootstrap sample idx = np.random.choice(range(n_cell), round(n_cell * boot_prop), replace=True) data_boot = data.iloc[idx,] diff --git a/stereo/algorithm/gen_ccc_micro_envs.py b/stereo/algorithm/gen_ccc_micro_envs.py index ea8d0388..5a5c1a53 100644 --- a/stereo/algorithm/gen_ccc_micro_envs.py +++ b/stereo/algorithm/gen_ccc_micro_envs.py @@ -22,6 +22,7 @@ def main( binsize: float = 2, eps: float = 1e-20, show_dividing_by_thresholds: bool = True, + seed_used: int = 1024, method: str = 'split', threshold: float = None, output_path: str = None, @@ -80,6 +81,7 @@ def main( which is then used to construct the microenvironments. :param eps: fill eps to zero kde to avoid inf KL divergence. :param show_dividing_by_thresholds: whether to display the result while running the first part of this function. + :param seed_used: the seed used for random number generator, fix it for reproducibility. :param method: define micro environments using two methods: 1) minimum spanning tree, or 2) pruning the fully connected tree based on a given threshold of KL, then split the graph into multiple strongly connected component. @@ -129,7 +131,8 @@ def main( min_num=min_num, binsize=binsize, eps=eps, - output_path=output_path + output_path=output_path, + seed_used=seed_used ) self.pipeline_res[res_key] = { 'output_path': output_path,