diff --git a/stereo/io/reader.py b/stereo/io/reader.py index cec38f88..c7134dd0 100644 --- a/stereo/io/reader.py +++ b/stereo/io/reader.py @@ -48,7 +48,7 @@ def read_gem( bin_type: str = "bins", bin_size: int = 100, is_sparse: bool = True, - bin_coord_offset: bool = False, + center_coordinates: bool = False, gene_name_index: bool = False ): """ @@ -66,9 +66,9 @@ def read_gem( the size of bin to merge, when `bin_type` is set to `'bins'`. is_sparse the expression matrix is sparse matrix, if `True`, otherwise `np.ndarray`. - bin_coord_offset - if set it to True, the coordinates of bins are calculated as - ((gene_coordinates - min_coordinates) // bin_size) * bin_size + min_coordinates + bin_size/2 + center_coordinates + if set it to True, the coordinate of each bin will be the center of the bin, + otherwise, the coordinate of each bin will be the left-top corner of the bin. gene_name_index In a v0.1 gem file, the column geneID is the gene name actually, but in a v0.2, geneID just a ID for genes and there is an additional column called geneName where is the gene name, @@ -106,7 +106,7 @@ def read_gem( if data.bin_type == 'cell_bins': gdf = parse_cell_bin_coor(df) else: - if bin_coord_offset: + if center_coordinates: df = parse_bin_coor(df, bin_size) else: df = parse_bin_coor_no_offset(df, bin_size) @@ -153,7 +153,7 @@ def read_gem( 'maxExp': data.exp_matrix.max(), # noqa 'resolution': resolution, } - data.bin_coord_offset = bin_coord_offset + data.center_coordinates = center_coordinates logger.info(f'the martrix has {data.cell_names.size} cells, and {data.gene_names.size} genes.') return data @@ -732,6 +732,7 @@ def read_h5ad( flavor: str = 'scanpy', bin_type: str = None, bin_size: int = None, + spatial_key: str = 'spatial', **kwargs ) -> Union[StereoExpData, AnnBasedStereoExpData]: """ @@ -742,20 +743,22 @@ def read_h5ad( file_path the path of the h5ad file. anndata - the object of AnnData which to be loaded, only available while `flavor` is `'scanpy'`. + the object of AnnData to be loaded, only available while `flavor` is `'scanpy'`. `file_path` and `anndata` only can input one of them. flavor the format of the h5ad file, defaults to `'scanpy'`. `scanpy`: AnnData format of scanpy - `stereopy`: h5ad format of stereo + `stereopy`: h5 format of stereo bin_type - the bin type includes `'bins'` or `'cell_bins'`. + the bin type includes `'bins'` and `'cell_bins'`. bin_size the size of bin to merge, when `bin_type` is set to `'bins'`. + spatial_key + the key of spatial information in AnnData.obsm, default to `'spatial'`. + Only available while `flavor` is `'scanpy'`. Returns --------------- - An object of StereoExpData while `flavor` is `'stereopy'` or an object of AnnBasedStereoExpData while `flavor` is - `'scanpy'` + An object of StereoExpData while `flavor` is `'stereopy'` or an object of AnnBasedStereoExpData while `flavor` is `'scanpy'` """ flavor = flavor.lower() @@ -775,7 +778,7 @@ def read_h5ad( if file_path is not None and anndata is not None: raise Exception("'file_path' and 'anndata' only can input one of them") return AnnBasedStereoExpData(h5ad_file_path=file_path, based_ann_data=anndata, bin_type=bin_type, - bin_size=bin_size, **kwargs) + bin_size=bin_size, spatial_key=spatial_key, **kwargs) else: raise ValueError("Invalid value for 'flavor'") diff --git a/stereo/plots/interact_plot/interactive_scatter.py b/stereo/plots/interact_plot/interactive_scatter.py index 5175b4b3..02cbdf72 100644 --- a/stereo/plots/interact_plot/interactive_scatter.py +++ b/stereo/plots/interact_plot/interactive_scatter.py @@ -165,7 +165,7 @@ def generate_gem_file( if selected_areas.bin_type == 'bins': @nb.njit(cache=True, nogil=True, parallel=True) - def __get_filtering_flag(data, bin_size, position, bin_coord_offset, num_threads): + def __get_filtering_flag(data, bin_size, position, center_coordinates, num_threads): num_threads = min(position.shape[0], num_threads) num_per_thread = position.shape[0] // num_threads num_left = position.shape[0] % num_threads @@ -183,7 +183,7 @@ def __get_filtering_flag(data, bin_size, position, bin_coord_offset, num_threads end = interval[i + 1] for j in range(start, end): x_start, y_start = position[j] - if bin_coord_offset: + if center_coordinates: x_start -= bin_size // 2 y_start -= bin_size // 2 x_end = x_start + bin_size @@ -198,7 +198,7 @@ def __get_filtering_flag(data, bin_size, position, bin_coord_offset, num_threads original_gem_df[['x', 'y', 'UMICount']].to_numpy(), selected_areas.bin_size, selected_areas.position, - selected_areas.bin_coord_offset, + selected_areas.center_coordinates, nb.get_num_threads(), ) selected_gem_df = original_gem_df[flag] diff --git a/stereo/plots/interact_plot/poly_selection.py b/stereo/plots/interact_plot/poly_selection.py index 4257b6aa..780f6bc9 100644 --- a/stereo/plots/interact_plot/poly_selection.py +++ b/stereo/plots/interact_plot/poly_selection.py @@ -209,7 +209,7 @@ def generate_gem_file( if selected_areas.bin_type == 'bins': @nb.njit(cache=True, nogil=True, parallel=True) - def __get_filtering_flag(data, bin_size, position, bin_coord_offset, num_threads, drop): + def __get_filtering_flag(data, bin_size, position, center_coordinates, num_threads, drop): num_threads = min(position.shape[0], num_threads) num_per_thread = position.shape[0] // num_threads num_left = position.shape[0] % num_threads @@ -227,7 +227,7 @@ def __get_filtering_flag(data, bin_size, position, bin_coord_offset, num_threads end = interval[i + 1] for j in range(start, end): x_start, y_start = position[j] - if bin_coord_offset: + if center_coordinates: x_start -= bin_size // 2 y_start -= bin_size // 2 x_end = x_start + bin_size @@ -242,7 +242,7 @@ def __get_filtering_flag(data, bin_size, position, bin_coord_offset, num_threads original_gem_df[['x', 'y', 'UMICount']].to_numpy(), selected_areas.bin_size, selected_areas.position, - selected_areas.bin_coord_offset, + selected_areas.center_coordinates, nb.get_num_threads(), drop ) diff --git a/stereo/utils/data_helper.py b/stereo/utils/data_helper.py index 9624e582..fa1522b3 100644 --- a/stereo/utils/data_helper.py +++ b/stereo/utils/data_helper.py @@ -321,10 +321,12 @@ def __merge_for_stereo_exp_data( new_data.sn[str(batch)] = data.sn if i == 0: new_data.exp_matrix = data.exp_matrix.copy() - new_data.cells = Cell(cell_name=cell_names, cell_border=data.cells.cell_border, batch=data.cells.batch) - new_data.genes = Gene(gene_name=data.gene_names) - new_data.cells._obs = data.cells._obs.copy(deep=True) - new_data.cells._obs.index = cell_names + # new_data.cells = Cell(cell_name=cell_names, cell_border=data.cells.cell_border, batch=data.cells.batch) + # new_data.genes = Gene(gene_name=data.gene_names) + # new_data.cells._obs = data.cells._obs.copy(deep=True) + # new_data.cells._obs.index = cell_names + new_data.cells = Cell(obs=data.cells._obs.copy(deep=True), cell_border=data.cells.cell_border, batch=data.cells.batch) + new_data.genes = Gene(var=data.genes._var.copy(deep=True)) new_data.position = data.position if data.position_z is None: new_data.position_z = np.repeat([[0]], repeats=data.position.shape[0], axis=0).astype( @@ -337,7 +339,7 @@ def __merge_for_stereo_exp_data( new_data.offset_y = data.offset_y new_data.attr = data.attr else: - current_obs = data.cells._obs.copy() + current_obs = data.cells._obs.copy(deep=True) current_obs.index = cell_names new_data.cells._obs = pd.concat([new_data.cells._obs, current_obs]) if new_data.cell_borders is not None and data.cell_borders is not None: