Skip to content

Commit

Permalink
Extract intensities (#38)
Browse files Browse the repository at this point in the history
* remove area and implement replace fo extract_bin()

* edit filepath_checks() and test_write_out()

* test_extract_bin_files() working with replace cases

* helper function condense_img_data()

* comments

* test_condense_img_data() passes

* edit tests
  • Loading branch information
camisowers authored Jul 6, 2022
1 parent 656e600 commit fb06d32
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 47 deletions.
95 changes: 74 additions & 21 deletions mibi_bin_tools/bin_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray,


def _write_out(img_data: np.ndarray, out_dir: str, fov_name: str, targets: List[str],
intensities=True) -> None:
intensities: Union[bool, List[str]] = False) -> None:
"""Parses extracted data and writes out tifs
Args:
Expand All @@ -73,36 +73,39 @@ def _write_out(img_data: np.ndarray, out_dir: str, fov_name: str, targets: List[
Name of the field of view
targets (array_like):
List of target names (i.e channels)
intensities (bool):
Save intensities
intensities (bool | List):
Whether or not to write out intensity images. If a List, specific
peaks can be written out, ignoring the rest, which will only have pulse count images.
"""
out_dirs = [
os.path.join(out_dir, fov_name),
os.path.join(out_dir, fov_name, 'intensities'),
os.path.join(out_dir, fov_name, 'intensity_times_width')
]
suffixes = [
'',
'_intensity',
'_int_width'
]
save_dtypes = [
np.uint16,
np.uint32,
np.uint32,
]

for i, (out_dir_i, suffix, save_dtype) in enumerate(zip(out_dirs, suffixes, save_dtypes)):
if i > 0 and not intensities:
continue
# break loop when index is larger than type dimension of img_data
if i+1 > img_data.shape[0]:
break
if not os.path.exists(out_dir_i):
os.makedirs(out_dir_i)
for j, target in enumerate(targets):
io.imsave(
os.path.join(out_dir_i, f'{target}{suffix}.tiff'),
img_data[i, :, :, j].astype(save_dtype),
plugin='tifffile',
check_contrast=False
)
# save all first images regardless of replacing
# if not replace (i=1), only save intensity images for specified targets
if i == 0 or (target in list(intensities)):
io.imsave(
os.path.join(out_dir_i, f'{target}{suffix}.tiff'),
img_data[i, :, :, j].astype(save_dtype),
plugin='tifffile',
check_contrast=False
)


def _find_bin_files(data_dir: str,
Expand Down Expand Up @@ -281,10 +284,48 @@ def _parse_intensities(fov: Dict[str, Any], intensities: Union[bool, List[str]])
fov['calc_intensity'] = [False, ] * len(fov['targets'])


def condense_img_data(img_data, targets, intensities, replace):
"""Changes image data from separate pulse and intensity data into one column if replace=True.
Args:
img_data (np.array):
Contains the image data with all pulse and intensity information.
targets (list):
List of targets.
intensities (bool | List):
Whether or not to extract intensity images. If a List, specific
peaks can be extracted, ignoring the rest, which will only have pulse count images
extracted.
replace (bool):
Whether to replace pulse images with intensity images.
Return:
altered img_data according to args
"""
# extracting intensity and replacing
if type_utils.any_true(intensities) and replace:
for j, target in enumerate(targets):
# replace only specified targets
if target in intensities:
img_data[0, :, :, j] = img_data[1, :, :, j]
img_data = img_data[[0], :, :, :]

# not extracting intensity
elif not type_utils.any_true(intensities):
img_data = img_data[[0], :, :, :]

# extracting intensity but not replacing
else:
img_data = img_data[[0, 1], :, :, :]

return img_data


def extract_bin_files(data_dir: str, out_dir: Union[str, None],
include_fovs: Union[List[str], None] = None,
panel: Union[Tuple[float, float], pd.DataFrame] = (-0.3, 0.0),
intensities: Union[bool, List[str]] = False, time_res: float = 500e-6):
intensities: Union[bool, List[str]] = False, replace=True,
time_res: float = 500e-6):
"""Converts MibiScope bin files to pulse count, intensity, and intensity * width tiff images
Args:
Expand All @@ -299,15 +340,18 @@ def extract_bin_files(data_dir: str, out_dir: Union[str, None],
If a pd.DataFrame, specific peaks with custom integration ranges. Column names must be
'Mass' and 'Target' with integration ranges specified via 'Start' and 'Stop' columns.
intensities (bool | List):
Whether or not to extract intensity and intensity * width images. If a List, specific
Whether or not to extract intensity images. If a List, specific
peaks can be extracted, ignoring the rest, which will only have pulse count images
extracted.
replace (bool):
Whether to replace pulse images with intensity images.
time_res (float):
Time resolution for scaling parabolic transformation
Returns:
None | np.ndarray:
image data if no out_dir is provided, otherwise no return
"""

fov_files = _find_bin_files(data_dir, include_fovs)

for fov in fov_files.values():
Expand All @@ -323,21 +367,33 @@ def extract_bin_files(data_dir: str, out_dir: Union[str, None],
bytes(bf, 'utf-8'), fov['lower_tof_range'],
fov['upper_tof_range'], np.array(fov['calc_intensity'], dtype=np.uint8)
)

# convert intensities=True to list of all targets
if type_utils.any_true(intensities):
if type(intensities) is not list:
intensities = list(fov['targets'])

img_data = condense_img_data(img_data, list(fov['targets']), intensities, replace)

if out_dir is not None:
_write_out(
img_data,
out_dir,
fov['bin'][:-4],
fov['targets'],
type_utils.any_true(intensities)
intensities
)
else:
if replace or not type_utils.any_true(intensities):
type_list = ['pulse']
else:
type_list = ['pulse', 'intensities']
image_data.append(
xr.DataArray(
data=img_data[np.newaxis, :],
coords=[
[fov['bin'].split('.')[0]],
['pulse', 'intensity', 'area'],
type_list,
np.arange(img_data.shape[1]),
np.arange(img_data.shape[2]),
list(fov['targets']),
Expand All @@ -349,9 +405,6 @@ def extract_bin_files(data_dir: str, out_dir: Union[str, None],
if out_dir is None:
image_data = xr.concat(image_data, dim='fov')

if not intensities:
image_data = image_data.loc[:, ['pulse'], :, :, :]

return image_data


Expand Down
106 changes: 80 additions & 26 deletions tests/bin_files_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,49 +80,95 @@ def case_specified_intensities(self):
return ['SMA']


class FovMetadataTestReplace:

def case_not_replace(self):
return False

def case_replace(self):
return True


@fixture
def filepath_checks():
inner_dir_names = [
'',
'intensities',
'intensity_times_width',
]

suffix_names = [
'',
'_intensity',
'_int_width',
]

def _filepath_checks(out_dir, fov_name, targets, intensities):
def _filepath_checks(out_dir, fov_name, targets, intensities, replace):
assert(os.path.exists(os.path.join(out_dir, fov_name)))

if type_utils.any_true(intensities):
if type(intensities) is not list:
intensities = targets

for i, (inner_name, suffix) in enumerate(zip(inner_dir_names, suffix_names)):
inner_dir = os.path.join(out_dir, fov_name, inner_name)
made_intensity_folder = i < 1 or type_utils.any_true(intensities)
made_intensity_folder = i < 1 or (i == 1 and intensities and not replace)
if made_intensity_folder:
assert(os.path.exists(inner_dir))
for target in targets:
tif_path = os.path.join(inner_dir, f'{target}{suffix}.tiff')
if i < 1 or (i == 1 and target in intensities):
assert (os.path.exists(tif_path))
else:
assert (not os.path.exists(tif_path))
else:
assert(not os.path.exists(inner_dir))
for target in targets:
tif_path = os.path.join(inner_dir, f'{target}{suffix}.tiff')
if made_intensity_folder:
assert(os.path.exists(tif_path))
else:
assert(not os.path.exists(tif_path))

return _filepath_checks


def test_write_out(filepath_checks):

img_data = np.zeros((3, 10, 10, 5), dtype=np.uint32)
img_data_compact = np.zeros((1, 10, 10, 5), dtype=np.uint32)
img_data_ext = np.zeros((2, 10, 10, 5), dtype=np.uint32)
fov_name = 'fov1'
targets = [chr(ord('a') + i) for i in range(5)]
intensities = [chr(ord('a') + i) for i in range(3)]

with tempfile.TemporaryDirectory() as tmpdir:
# correctness
bin_files._write_out(img_data, tmpdir, fov_name, targets)
filepath_checks(tmpdir, fov_name, targets, True)
# correct write out without intensities
bin_files._write_out(img_data_compact, tmpdir, fov_name, targets, intensities=False)
filepath_checks(tmpdir, fov_name, targets, intensities=False, replace=False)

with tempfile.TemporaryDirectory() as tmpdir:
# correct write out with intensities
bin_files._write_out(img_data_compact, tmpdir, fov_name, targets, intensities=intensities)
filepath_checks(tmpdir, fov_name, targets, intensities=intensities, replace=True)

with tempfile.TemporaryDirectory() as tmpdir:
# correct write out with intensities and without replacing
bin_files._write_out(img_data_ext, tmpdir, fov_name, targets, intensities=intensities)
filepath_checks(tmpdir, fov_name, targets, intensities=intensities, replace=False)


def test_condense_img_data():
pulse = [[[[0, 0, 0, 0, 0]]]]
intensity = [[[[1, 1, 1, 1, 1]]]]
img_data = np.concatenate((pulse, intensity), axis=0)
targets = [chr(ord('a') + i) for i in range(5)]
intensities = [chr(ord('a') + i) for i in range(3)]

img_data_replace = [[[[1, 1, 1, 0, 0]]]]

# test for no intensities
no_intensity_data = bin_files.condense_img_data(img_data, targets, False, replace=True)
assert(np.array_equal(no_intensity_data, pulse))

# test for replaced intensities
replaced_data = bin_files.condense_img_data(img_data, targets, intensities, replace=True)
assert(np.array_equal(replaced_data, img_data_replace))

# test for not replaced intensities
not_replaced_data = bin_files.condense_img_data(img_data, targets, intensities, replace=False)
assert(np.array_equal(not_replaced_data, img_data))


def _make_blank_file(folder: str, name: str):
Expand Down Expand Up @@ -167,23 +213,26 @@ class FovMetadataCases:
@parametrize_with_cases('panel', cases=FovMetadataTestPanels)
@parametrize_with_cases('channels', cases=FovMetadataTestChannels)
@parametrize_with_cases('intensities', cases=FovMetadataTestIntensities)
def case_tissue(self, test_dir, fov, panel, channels, intensities):
return test_dir, fov, panel, channels, intensities
@parametrize_with_cases('replace', cases=FovMetadataTestReplace)
def case_tissue(self, test_dir, fov, panel, channels, intensities, replace):
return test_dir, fov, panel, channels, intensities, replace

@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles, has_tag='moly')
@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified')
@parametrize_with_cases('channels', cases=FovMetadataTestChannels)
@parametrize_with_cases('intensities', cases=FovMetadataTestIntensities)
def case_moly(self, test_dir, fov, panel, channels, intensities):
return test_dir, fov, panel, channels, intensities
@parametrize_with_cases('replace', cases=FovMetadataTestReplace)
def case_moly(self, test_dir, fov, panel, channels, intensities, replace):
return test_dir, fov, panel, channels, intensities, replace

@pytest.mark.xfail(raises=KeyError, strict=True)
@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles, has_tag='moly')
@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='global')
@parametrize_with_cases('channels', cases=FovMetadataTestChannels)
@parametrize_with_cases('intensities', cases=FovMetadataTestIntensities)
def case_global_panel_moly(self, test_dir, fov, panel, channels, intensities):
return test_dir, fov, panel, channels, intensities
@parametrize_with_cases('replace', cases=FovMetadataTestReplace)
def case_global_panel_moly(self, test_dir, fov, panel, channels, intensities, replace):
return test_dir, fov, panel, channels, intensities, replace


@parametrize_with_cases('test_dir, fov, panel, channels, intensities', cases=FovMetadataCases)
Expand All @@ -197,20 +246,25 @@ def test_fill_fov_metadata(test_dir, fov, panel, channels, intensities):
@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles)
@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified')
@parametrize_with_cases('intensities', cases=FovMetadataTestIntensities)
def test_extract_bin_files(test_dir, fov, panel, intensities, filepath_checks):
@parametrize_with_cases('replace', cases=FovMetadataTestReplace)
def test_extract_bin_files(test_dir, fov, panel, intensities, replace, filepath_checks):
time_res = 500e-6

with tempfile.TemporaryDirectory() as tmpdir:
bin_files.extract_bin_files(test_dir, tmpdir, None, panel, intensities, time_res)
filepath_checks(tmpdir, fov['json'].split('.')[0], panel['Target'].values, intensities)
bin_files.extract_bin_files(test_dir, tmpdir, None, panel, intensities,
replace, time_res)
filepath_checks(tmpdir, fov['json'].split('.')[0], panel['Target'].values, intensities,
replace=replace)

# test xr write out
test_xr = bin_files.extract_bin_files(test_dir, None, None, panel, intensities, time_res)
test_xr = bin_files.extract_bin_files(test_dir, None, None, panel, intensities,
replace, time_res)
assert(list(test_xr.dims) == ['fov', 'type', 'x', 'y', 'channel'])

if not intensities:
if not type_utils.any_true(intensities) or (type_utils.any_true(intensities) and replace):
assert(list(test_xr.type) == ['pulse'])
else:
assert(list(test_xr.type) == ['pulse', 'intensity', 'area'])
assert(list(test_xr.type) == ['pulse', 'intensities'])

assert(len(io_utils.list_files(test_dir, substrs=['.bin'])) == len(test_xr.fov))
if len(test_xr.fov) > 1:
Expand Down

0 comments on commit fb06d32

Please sign in to comment.