diff --git a/mibi_bin_tools/bin_files.py b/mibi_bin_tools/bin_files.py index 6c03259..af58ab7 100644 --- a/mibi_bin_tools/bin_files.py +++ b/mibi_bin_tools/bin_files.py @@ -7,8 +7,8 @@ import skimage.io as io import xarray as xr -from mibi_bin_tools import io_utils, type_utils, _extract_bin -from tmi.image_utils import save_image +from mibi_bin_tools import type_utils, _extract_bin +from tmi import io_utils, image_utils def _mass2tof(masses_arr: np.ndarray, mass_offset: float, mass_gain: float, @@ -102,7 +102,7 @@ def _write_out(img_data: np.ndarray, out_dir: str, fov_name: str, targets: List[ # if not replace (i=1), only save intensity images for specified targets if i == 0 or (target in list(intensities)): fname = os.path.join(out_dir_i, f"{target}{suffix}.tiff") - save_image(fname=fname, data=img_data[i, :, :, j].astype(save_dtype)) + image_utils.save_image(fname=fname, data=img_data[i, :, :, j].astype(save_dtype)) def _find_bin_files(data_dir: str, diff --git a/mibi_bin_tools/io_utils.py b/mibi_bin_tools/io_utils.py deleted file mode 100644 index 28ab802..0000000 --- a/mibi_bin_tools/io_utils.py +++ /dev/null @@ -1,167 +0,0 @@ -import os -import warnings - - -def list_files(dir_name, substrs=None, exact_match=False): - """ List all files in a directory containing at least one given substring - - Args: - dir_name (str): - Parent directory for files of interest - substrs (str or list): - Substring matching criteria, defaults to None (all files) - exact_match (bool): - If True, will match exact file names (so 'C' will match only 'C.tif') - If False, will match substr pattern in file (so 'C' will match 'C.tif' and 'CD30.tif') - - Returns: - list: - List of files containing at least one of the substrings - """ - - files = os.listdir(dir_name) - files = [file for file in files if not os.path.isdir(os.path.join(dir_name, file))] - - # default to return all files - if substrs is None: - return files - - # handle case where substrs is a single string (not wrapped in list) - if type(substrs) is not list: - substrs = [substrs] - - if exact_match: - matches = [file - for file in files - if any([ - substr == os.path.splitext(file)[0] - for substr in substrs - ])] - else: - matches = [file - for file in files - if any([ - substr in file - for substr in substrs - ])] - - return matches - - -def remove_file_extensions(files): - """Removes file extensions from a list of files - - Args: - files (list): - List of files to remove file extensions from. - Any element that doesn't have an extension is left unchanged - - Raises: - UserWarning: - Some of the processed file names still contain a period - - Returns: - list: - List of files without file extensions - """ - - # make sure we don't try to split on a non-existent list - if files is None: - return - - # remove the file extension - names = [os.path.splitext(name)[0] for name in files] - - # identify names with '.' in them: these may not be processed correctly - bad_names = [name for name in names if '.' in name] - if len(bad_names) > 0: - warnings.warn(f"These files still have \".\" in them after file extension removal: " - f"{','.join(bad_names)}, " - f"please double check that these are the correct names") - - return names - - -def extract_delimited_names(names, delimiter='_', delimiter_optional=True): - """For a given list of names, extract the delimited prefix - - Examples (if delimiter='_'): - - - 'fov1' becomes 'fov1' - - 'fov2_part1' becomes 'fov2' - - 'fov3_part1_part2' becomes 'fov3' - - Args: - names (list): - List of names to split by delimiter. - Make sure to call remove_file_extensions first if you need to drop file extensions. - delimiter (str): - Character separator used to determine filename prefix. Defaults to '_'. - delimiter_optional (bool): - If False, function will return None if any of the files don't contain the delimiter. - Defaults to True. Ignored if delimiter is None. - - Raises: - UserWarning: - Raised if delimiter_optional=False and no delimiter is present in any of the files - - Returns: - list: - List of extracted names. Indicies should match that of files - """ - - if names is None: - return - - # check for bad files/folders - if delimiter is not None and not delimiter_optional: - no_delim = [ - delimiter not in name - for name in names - ] - if any(no_delim): - print(f"The following files do not have the mandatory delimiter, " - f"'{delimiter}': " - f"{','.join([name for indx,name in enumerate(names) if no_delim[indx]])}") - warnings.warn("files without mandatory delimiter") - - return None - - # now split on the delimiter as well - names = [name.split(delimiter)[0] for name in names] - - return names - - -def list_folders(dir_name, substrs=None): - """ List all folders in a directory containing at least one given substring - - Args: - dir_name (str): - Parent directory for folders of interest - substrs (str or list): - Substring matching criteria, defaults to None (all folders) - - Returns: - list: - List of folders containing at least one of the substrings - """ - files = os.listdir(dir_name) - folders = [file for file in files if os.path.isdir(os.path.join(dir_name, file))] - - # default to return all files - if substrs is None: - return folders - - # handle case where substrs is a single string (not wrapped in list) - if type(substrs) is not list: - substrs = [substrs] - - matches = [folder - for folder in folders - if any([ - substr in folder - for substr in substrs - ])] - - return matches diff --git a/mibi_bin_tools/panel_utils.py b/mibi_bin_tools/panel_utils.py index e13fd68..3dbe86a 100644 --- a/mibi_bin_tools/panel_utils.py +++ b/mibi_bin_tools/panel_utils.py @@ -1,7 +1,7 @@ from typing import Union, List import pandas as pd -from mibi_bin_tools.type_utils import make_iterable +from tmi import misc_utils def make_panel(mass: Union[float, List[float]], @@ -25,9 +25,9 @@ def make_panel(mass: Union[float, List[float]], single mass panel as pandas dataframe """ - mass = make_iterable(mass) + mass = misc_utils.make_iterable(mass) if target_name is not None: - target_name = make_iterable(target_name) + target_name = misc_utils.make_iterable(target_name) if len(mass) != len(target_name): raise ValueError( '`mass` and `target_name` did not contain the same number of elements. ' @@ -38,7 +38,7 @@ def make_panel(mass: Union[float, List[float]], # check for range lists for r in (low_range, high_range): - if make_iterable(r) == r: + if misc_utils.make_iterable(r) == r: if len(r) != len(mass): raise ValueError( '`mass` and a range argument did not contain the same number of elements. ' @@ -46,8 +46,8 @@ def make_panel(mass: Union[float, List[float]], 'be set to float values, e.g `low_range=0.3`' ) - low_range = make_iterable(low_range) - high_range = make_iterable(high_range) + low_range = misc_utils.make_iterable(low_range) + high_range = misc_utils.make_iterable(high_range) if len(low_range) != len(mass): low_range = low_range * len(mass) diff --git a/mibi_bin_tools/type_utils.py b/mibi_bin_tools/type_utils.py index 4129d9b..ced0758 100644 --- a/mibi_bin_tools/type_utils.py +++ b/mibi_bin_tools/type_utils.py @@ -1,4 +1,5 @@ from typing import Union, Iterable +from tmi import misc_utils def any_true(a: Union[bool, Iterable[bool]]) -> bool: @@ -12,20 +13,4 @@ def any_true(a: Union[bool, Iterable[bool]]) -> bool: bool: whether any true values where found """ - return any(make_iterable(a)) - - -def make_iterable(a: Union[type, Iterable[type]], ignore_str=True) -> Iterable[type]: - """ Convert noniterable type to singelton in list - - Args: - a (T | Iterable[T]): - value or iterable of type T - ignore_str (bool): - whether to ignore the iterability of the str type - - Returns: - List[T]: - a as singleton in list, or a if a was already iterable. - """ - return a if hasattr(a, '__iter__') and not (isinstance(a, str) and ignore_str) else [a] + return any(misc_utils.make_iterable(a)) diff --git a/requirements.txt b/requirements.txt index 1f164ec..87341fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ matplotlib==3.4.3 numpy>=1.21.6,<2 pandas>=1.3.5,<2 scikit-image>=0.19.3,<0.20 -tmi @ git+https://github.com/angelolab/tmi.git +tmi @ git+https://github.com/angelolab/tmi.git@v0.0.1 xarray>=2022.6.0,<2023 diff --git a/tests/bin_files_test.py b/tests/bin_files_test.py index 622af87..196c961 100644 --- a/tests/bin_files_test.py +++ b/tests/bin_files_test.py @@ -9,7 +9,8 @@ import numpy as np import pandas as pd -from mibi_bin_tools import bin_files, io_utils, type_utils, _extract_bin +from mibi_bin_tools import bin_files, type_utils, _extract_bin +from tmi import io_utils THIS_DIR = Path(__file__).parent diff --git a/tests/io_utils_test.py b/tests/io_utils_test.py deleted file mode 100644 index a65a9c5..0000000 --- a/tests/io_utils_test.py +++ /dev/null @@ -1,129 +0,0 @@ -import os -import tempfile -import pathlib -import pytest - -from mibi_bin_tools import io_utils as iou - - -def test_list_files(): - # test extension matching - with tempfile.TemporaryDirectory() as temp_dir: - # set up temp_dir files - filenames = [ - 'tf.txt', - 'othertf.txt', - 'test.out', - 'test.csv', - ] - for filename in filenames: - pathlib.Path(os.path.join(temp_dir, filename)).touch() - - # add extra folder (shouldn't be picked up) - os.mkdir(os.path.join(temp_dir, 'badfolder_test')) - - # test substrs is None (default) - get_all = iou.list_files(temp_dir) - assert sorted(get_all) == sorted(filenames) - - # test substrs is not list (single string) - get_txt = iou.list_files(temp_dir, substrs='.txt') - assert sorted(get_txt) == sorted(filenames[0:2]) - - # test substrs is list - get_test_and_other = iou.list_files(temp_dir, substrs=['.txt', '.out']) - assert sorted(get_test_and_other) == sorted(filenames[:3]) - - # test file name exact matching - with tempfile.TemporaryDirectory() as temp_dir: - filenames = [ - 'chan0.tif', - 'chan.tif', - 'c.tif' - ] - for filename in filenames: - pathlib.Path(os.path.join(temp_dir, filename)).touch() - - # add extra folder (shouldn't be picked up) - os.mkdir(os.path.join(temp_dir, 'badfolder_test')) - - # test substrs is None (default) - get_all = iou.list_files(temp_dir, exact_match=True) - assert sorted(get_all) == sorted(filenames) - - # test substrs is not list (single string) - get_txt = iou.list_files(temp_dir, substrs='c', exact_match=True) - assert sorted(get_txt) == [filenames[2]] - - # test substrs is list - get_test_and_other = iou.list_files(temp_dir, substrs=['c', 'chan'], exact_match=True) - assert sorted(get_test_and_other) == sorted(filenames[1:]) - - -def test_remove_file_extensions(): - # test a mixture of file paths and extensions - files = [ - 'fov1.tiff', - 'fov2.tif', - 'fov3.png', - 'fov4.jpg' - ] - - assert iou.remove_file_extensions(None) is None - assert iou.remove_file_extensions([]) == [] - - files_sans_ext = ['fov1', 'fov2', 'fov3', 'fov4'] - - new_files = iou.remove_file_extensions(files) - - assert new_files == files_sans_ext - - with pytest.warns(UserWarning): - new_files = iou.remove_file_extensions(['fov5.tar.gz', 'fov6.sample.csv']) - assert new_files == ['fov5.tar', 'fov6.sample'] - - -def test_extract_delimited_names(): - filenames = [ - 'fov1_restofname', - 'fov2', - ] - - # test no files given (None/[]) - assert iou.extract_delimited_names(None) is None - assert iou.extract_delimited_names([]) == [] - - # non-optional delimiter warning - with pytest.warns(UserWarning): - iou.extract_delimited_names(['fov2'], delimiter='_', delimiter_optional=False) - - # test regular files list - assert ['fov1', 'fov2'] == iou.extract_delimited_names(filenames, delimiter='_') - - -def test_list_folders(): - with tempfile.TemporaryDirectory() as temp_dir: - # set up temp_dir subdirs - dirnames = [ - 'tf_txt', - 'othertf_txt', - 'test_csv', - 'test_out', - ] - for dirname in dirnames: - os.mkdir(os.path.join(temp_dir, dirname)) - - # add extra file - pathlib.Path(os.path.join(temp_dir, 'test_badfile.txt')).touch() - - # test substrs is None (default) - get_all = iou.list_folders(temp_dir) - assert get_all.sort() == dirnames.sort() - - # test substrs is not list (single string) - get_txt = iou.list_folders(temp_dir, substrs='_txt') - assert get_txt.sort() == dirnames[0:2].sort() - - # test substrs is list - get_test_and_other = iou.list_folders(temp_dir, substrs=['test_', 'other']) - assert get_test_and_other.sort() == dirnames[1:].sort()