diff --git a/mibi_bin_tools/bin_files.py b/mibi_bin_tools/bin_files.py index 1ec5264..c1df329 100644 --- a/mibi_bin_tools/bin_files.py +++ b/mibi_bin_tools/bin_files.py @@ -61,7 +61,7 @@ def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray, def _write_out(img_data: np.ndarray, out_dir: str, fov_name: str, targets: List[str], - intensities=True) -> None: + intensities: Union[bool, List[str]] = False) -> None: """Parses extracted data and writes out tifs Args: @@ -73,36 +73,39 @@ def _write_out(img_data: np.ndarray, out_dir: str, fov_name: str, targets: List[ Name of the field of view targets (array_like): List of target names (i.e channels) - intensities (bool): - Save intensities + intensities (bool | List): + Whether or not to write out intensity images. If a List, specific + peaks can be written out, ignoring the rest, which will only have pulse count images. """ out_dirs = [ os.path.join(out_dir, fov_name), os.path.join(out_dir, fov_name, 'intensities'), - os.path.join(out_dir, fov_name, 'intensity_times_width') ] suffixes = [ '', '_intensity', - '_int_width' ] save_dtypes = [ - np.uint16, np.uint32, np.uint32, ] + for i, (out_dir_i, suffix, save_dtype) in enumerate(zip(out_dirs, suffixes, save_dtypes)): - if i > 0 and not intensities: - continue + # break loop when index is larger than type dimension of img_data + if i+1 > img_data.shape[0]: + break if not os.path.exists(out_dir_i): os.makedirs(out_dir_i) for j, target in enumerate(targets): - io.imsave( - os.path.join(out_dir_i, f'{target}{suffix}.tiff'), - img_data[i, :, :, j].astype(save_dtype), - plugin='tifffile', - check_contrast=False - ) + # save all first images regardless of replacing + # if not replace (i=1), only save intensity images for specified targets + if i == 0 or (target in list(intensities)): + io.imsave( + os.path.join(out_dir_i, f'{target}{suffix}.tiff'), + img_data[i, :, :, j].astype(save_dtype), + plugin='tifffile', + check_contrast=False + ) def _find_bin_files(data_dir: str, @@ -281,10 +284,48 @@ def _parse_intensities(fov: Dict[str, Any], intensities: Union[bool, List[str]]) fov['calc_intensity'] = [False, ] * len(fov['targets']) +def condense_img_data(img_data, targets, intensities, replace): + """Changes image data from separate pulse and intensity data into one column if replace=True. + Args: + img_data (np.array): + Contains the image data with all pulse and intensity information. + targets (list): + List of targets. + intensities (bool | List): + Whether or not to extract intensity images. If a List, specific + peaks can be extracted, ignoring the rest, which will only have pulse count images + extracted. + replace (bool): + Whether to replace pulse images with intensity images. + + Return: + altered img_data according to args + + """ + # extracting intensity and replacing + if type_utils.any_true(intensities) and replace: + for j, target in enumerate(targets): + # replace only specified targets + if target in intensities: + img_data[0, :, :, j] = img_data[1, :, :, j] + img_data = img_data[[0], :, :, :] + + # not extracting intensity + elif not type_utils.any_true(intensities): + img_data = img_data[[0], :, :, :] + + # extracting intensity but not replacing + else: + img_data = img_data[[0, 1], :, :, :] + + return img_data + + def extract_bin_files(data_dir: str, out_dir: Union[str, None], include_fovs: Union[List[str], None] = None, panel: Union[Tuple[float, float], pd.DataFrame] = (-0.3, 0.0), - intensities: Union[bool, List[str]] = False, time_res: float = 500e-6): + intensities: Union[bool, List[str]] = False, replace=True, + time_res: float = 500e-6): """Converts MibiScope bin files to pulse count, intensity, and intensity * width tiff images Args: @@ -299,15 +340,18 @@ def extract_bin_files(data_dir: str, out_dir: Union[str, None], If a pd.DataFrame, specific peaks with custom integration ranges. Column names must be 'Mass' and 'Target' with integration ranges specified via 'Start' and 'Stop' columns. intensities (bool | List): - Whether or not to extract intensity and intensity * width images. If a List, specific + Whether or not to extract intensity images. If a List, specific peaks can be extracted, ignoring the rest, which will only have pulse count images extracted. + replace (bool): + Whether to replace pulse images with intensity images. time_res (float): Time resolution for scaling parabolic transformation Returns: None | np.ndarray: image data if no out_dir is provided, otherwise no return """ + fov_files = _find_bin_files(data_dir, include_fovs) for fov in fov_files.values(): @@ -323,21 +367,33 @@ def extract_bin_files(data_dir: str, out_dir: Union[str, None], bytes(bf, 'utf-8'), fov['lower_tof_range'], fov['upper_tof_range'], np.array(fov['calc_intensity'], dtype=np.uint8) ) + + # convert intensities=True to list of all targets + if type_utils.any_true(intensities): + if type(intensities) is not list: + intensities = list(fov['targets']) + + img_data = condense_img_data(img_data, list(fov['targets']), intensities, replace) + if out_dir is not None: _write_out( img_data, out_dir, fov['bin'][:-4], fov['targets'], - type_utils.any_true(intensities) + intensities ) else: + if replace or not type_utils.any_true(intensities): + type_list = ['pulse'] + else: + type_list = ['pulse', 'intensities'] image_data.append( xr.DataArray( data=img_data[np.newaxis, :], coords=[ [fov['bin'].split('.')[0]], - ['pulse', 'intensity', 'area'], + type_list, np.arange(img_data.shape[1]), np.arange(img_data.shape[2]), list(fov['targets']), @@ -349,9 +405,6 @@ def extract_bin_files(data_dir: str, out_dir: Union[str, None], if out_dir is None: image_data = xr.concat(image_data, dim='fov') - if not intensities: - image_data = image_data.loc[:, ['pulse'], :, :, :] - return image_data diff --git a/tests/bin_files_test.py b/tests/bin_files_test.py index 9e6da91..cf19c09 100644 --- a/tests/bin_files_test.py +++ b/tests/bin_files_test.py @@ -80,49 +80,95 @@ def case_specified_intensities(self): return ['SMA'] +class FovMetadataTestReplace: + + def case_not_replace(self): + return False + + def case_replace(self): + return True + + @fixture def filepath_checks(): inner_dir_names = [ '', 'intensities', - 'intensity_times_width', ] suffix_names = [ '', '_intensity', - '_int_width', ] - def _filepath_checks(out_dir, fov_name, targets, intensities): + def _filepath_checks(out_dir, fov_name, targets, intensities, replace): assert(os.path.exists(os.path.join(out_dir, fov_name))) + + if type_utils.any_true(intensities): + if type(intensities) is not list: + intensities = targets + for i, (inner_name, suffix) in enumerate(zip(inner_dir_names, suffix_names)): inner_dir = os.path.join(out_dir, fov_name, inner_name) - made_intensity_folder = i < 1 or type_utils.any_true(intensities) + made_intensity_folder = i < 1 or (i == 1 and intensities and not replace) if made_intensity_folder: assert(os.path.exists(inner_dir)) + for target in targets: + tif_path = os.path.join(inner_dir, f'{target}{suffix}.tiff') + if i < 1 or (i == 1 and target in intensities): + assert (os.path.exists(tif_path)) + else: + assert (not os.path.exists(tif_path)) else: assert(not os.path.exists(inner_dir)) - for target in targets: - tif_path = os.path.join(inner_dir, f'{target}{suffix}.tiff') - if made_intensity_folder: - assert(os.path.exists(tif_path)) - else: - assert(not os.path.exists(tif_path)) return _filepath_checks def test_write_out(filepath_checks): - img_data = np.zeros((3, 10, 10, 5), dtype=np.uint32) + img_data_compact = np.zeros((1, 10, 10, 5), dtype=np.uint32) + img_data_ext = np.zeros((2, 10, 10, 5), dtype=np.uint32) fov_name = 'fov1' targets = [chr(ord('a') + i) for i in range(5)] + intensities = [chr(ord('a') + i) for i in range(3)] with tempfile.TemporaryDirectory() as tmpdir: - # correctness - bin_files._write_out(img_data, tmpdir, fov_name, targets) - filepath_checks(tmpdir, fov_name, targets, True) + # correct write out without intensities + bin_files._write_out(img_data_compact, tmpdir, fov_name, targets, intensities=False) + filepath_checks(tmpdir, fov_name, targets, intensities=False, replace=False) + + with tempfile.TemporaryDirectory() as tmpdir: + # correct write out with intensities + bin_files._write_out(img_data_compact, tmpdir, fov_name, targets, intensities=intensities) + filepath_checks(tmpdir, fov_name, targets, intensities=intensities, replace=True) + + with tempfile.TemporaryDirectory() as tmpdir: + # correct write out with intensities and without replacing + bin_files._write_out(img_data_ext, tmpdir, fov_name, targets, intensities=intensities) + filepath_checks(tmpdir, fov_name, targets, intensities=intensities, replace=False) + + +def test_condense_img_data(): + pulse = [[[[0, 0, 0, 0, 0]]]] + intensity = [[[[1, 1, 1, 1, 1]]]] + img_data = np.concatenate((pulse, intensity), axis=0) + targets = [chr(ord('a') + i) for i in range(5)] + intensities = [chr(ord('a') + i) for i in range(3)] + + img_data_replace = [[[[1, 1, 1, 0, 0]]]] + + # test for no intensities + no_intensity_data = bin_files.condense_img_data(img_data, targets, False, replace=True) + assert(np.array_equal(no_intensity_data, pulse)) + + # test for replaced intensities + replaced_data = bin_files.condense_img_data(img_data, targets, intensities, replace=True) + assert(np.array_equal(replaced_data, img_data_replace)) + + # test for not replaced intensities + not_replaced_data = bin_files.condense_img_data(img_data, targets, intensities, replace=False) + assert(np.array_equal(not_replaced_data, img_data)) def _make_blank_file(folder: str, name: str): @@ -167,23 +213,26 @@ class FovMetadataCases: @parametrize_with_cases('panel', cases=FovMetadataTestPanels) @parametrize_with_cases('channels', cases=FovMetadataTestChannels) @parametrize_with_cases('intensities', cases=FovMetadataTestIntensities) - def case_tissue(self, test_dir, fov, panel, channels, intensities): - return test_dir, fov, panel, channels, intensities + @parametrize_with_cases('replace', cases=FovMetadataTestReplace) + def case_tissue(self, test_dir, fov, panel, channels, intensities, replace): + return test_dir, fov, panel, channels, intensities, replace @parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles, has_tag='moly') @parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified') @parametrize_with_cases('channels', cases=FovMetadataTestChannels) @parametrize_with_cases('intensities', cases=FovMetadataTestIntensities) - def case_moly(self, test_dir, fov, panel, channels, intensities): - return test_dir, fov, panel, channels, intensities + @parametrize_with_cases('replace', cases=FovMetadataTestReplace) + def case_moly(self, test_dir, fov, panel, channels, intensities, replace): + return test_dir, fov, panel, channels, intensities, replace @pytest.mark.xfail(raises=KeyError, strict=True) @parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles, has_tag='moly') @parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='global') @parametrize_with_cases('channels', cases=FovMetadataTestChannels) @parametrize_with_cases('intensities', cases=FovMetadataTestIntensities) - def case_global_panel_moly(self, test_dir, fov, panel, channels, intensities): - return test_dir, fov, panel, channels, intensities + @parametrize_with_cases('replace', cases=FovMetadataTestReplace) + def case_global_panel_moly(self, test_dir, fov, panel, channels, intensities, replace): + return test_dir, fov, panel, channels, intensities, replace @parametrize_with_cases('test_dir, fov, panel, channels, intensities', cases=FovMetadataCases) @@ -197,20 +246,25 @@ def test_fill_fov_metadata(test_dir, fov, panel, channels, intensities): @parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles) @parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified') @parametrize_with_cases('intensities', cases=FovMetadataTestIntensities) -def test_extract_bin_files(test_dir, fov, panel, intensities, filepath_checks): +@parametrize_with_cases('replace', cases=FovMetadataTestReplace) +def test_extract_bin_files(test_dir, fov, panel, intensities, replace, filepath_checks): time_res = 500e-6 + with tempfile.TemporaryDirectory() as tmpdir: - bin_files.extract_bin_files(test_dir, tmpdir, None, panel, intensities, time_res) - filepath_checks(tmpdir, fov['json'].split('.')[0], panel['Target'].values, intensities) + bin_files.extract_bin_files(test_dir, tmpdir, None, panel, intensities, + replace, time_res) + filepath_checks(tmpdir, fov['json'].split('.')[0], panel['Target'].values, intensities, + replace=replace) # test xr write out - test_xr = bin_files.extract_bin_files(test_dir, None, None, panel, intensities, time_res) + test_xr = bin_files.extract_bin_files(test_dir, None, None, panel, intensities, + replace, time_res) assert(list(test_xr.dims) == ['fov', 'type', 'x', 'y', 'channel']) - if not intensities: + if not type_utils.any_true(intensities) or (type_utils.any_true(intensities) and replace): assert(list(test_xr.type) == ['pulse']) else: - assert(list(test_xr.type) == ['pulse', 'intensity', 'area']) + assert(list(test_xr.type) == ['pulse', 'intensities']) assert(len(io_utils.list_files(test_dir, substrs=['.bin'])) == len(test_xr.fov)) if len(test_xr.fov) > 1: