Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds total spectra extraction #35

Merged
merged 14 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions src/mibi_bin_tools/_extract_bin.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,75 @@ cdef MAXINDEX_t _extract_total_counts(const char* filename):
return counts


cdef _extract_total_spectra(const char* filename, DTYPE_t low_range, DTYPE_t high_range):
"""Extract total spectra from bin file

Args:
filename (const char*):
Name of bin file to extract
low_range (np.uint16_t):
The lowest time interval to consider
high_range (np.uint16_t):
The highest time interval to consider
"""
cdef DTYPE_t num_x, num_y, num_trig, num_frames, desc_len, trig, num_pulses, pulse, time
cdef MAXINDEX_t data_start, pix

# 10MB buffer
cdef MAXINDEX_t BUFFER_SIZE = 10 * 1024 * 1024
cdef char* file_buffer = <char*> malloc(BUFFER_SIZE * sizeof(char))
cdef MAXINDEX_t buffer_idx = 0

# open file
cdef FILE* fp
fp = fopen(filename, "rb")

# note, if cython has packed structs, this would be easier
# or even macros tbh
fseek(fp, 0x6, SEEK_SET)
fread(&num_x, sizeof(DTYPE_t), 1, fp)
fread(&num_y, sizeof(DTYPE_t), 1, fp)
fread(&num_trig, sizeof(DTYPE_t), 1, fp)
fread(&num_frames, sizeof(DTYPE_t), 1, fp)
fseek(fp, 0x2, SEEK_CUR)
fread(&desc_len, sizeof(DTYPE_t), 1, fp)

spectra_by_pixel = \
cvarray(
shape=(<MAXINDEX_t>(num_x) * <MAXINDEX_t>(num_y),
<MAXINDEX_t>(high_range) - <MAXINDEX_t>(low_range) + 1),
itemsize=sizeof(SMALL_t),
format='B'
)
cdef SMALL_t[:, :] spectra_by_pixel_view = spectra_by_pixel
spectra_by_pixel_view[:, :] = 0

data_start = \
<MAXINDEX_t>(num_x) * <MAXINDEX_t>(num_y) * <MAXINDEX_t>(num_frames) * 8 + desc_len + 0x12

fseek(fp, data_start, SEEK_SET)
fread(file_buffer, sizeof(char), BUFFER_SIZE, fp)
for pix in range(<MAXINDEX_t>(num_x) * <MAXINDEX_t>(num_y)):
for trig in range(num_trig):
_check_buffer_refill(fp, file_buffer, &buffer_idx, 0x8 * sizeof(char), BUFFER_SIZE)
memcpy(&num_pulses, file_buffer + buffer_idx + 0x6, sizeof(time))
buffer_idx += 0x8
for pulse in range(num_pulses):
_check_buffer_refill(fp, file_buffer, &buffer_idx, 0x5 * sizeof(char), BUFFER_SIZE)
memcpy(&time, file_buffer + buffer_idx, sizeof(time))
buffer_idx += 0x5

if time >= low_range and time <= high_range:
spectra_by_pixel_view[pix, <MAXINDEX_t>(time) - <MAXINDEX_t>(low_range)] += 1

fclose(fp)
free(file_buffer)

return np.asarray(spectra_by_pixel, dtype=np.uint8).reshape(
(<MAXINDEX_t>num_x, <MAXINDEX_t>num_y, <MAXINDEX_t>high_range - <MAXINDEX_t>low_range + 1)
)


def c_extract_bin(char* filename, DTYPE_t[:] low_range,
DTYPE_t[:] high_range, SMALL_t[:] calc_intensity):
return np.asarray(
Expand All @@ -437,3 +506,6 @@ def c_pulse_height_vs_positive_pixel(char* filename, DTYPE_t low_range, DTYPE_t
def c_total_counts(char* filename):
counts = _extract_total_counts(filename)
return int(counts)

def c_total_spectra(char* filename, DTYPE_t low_range, DTYPE_t high_range):
return _extract_total_spectra(filename, low_range, high_range)
83 changes: 83 additions & 0 deletions src/mibi_bin_tools/bin_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,27 @@ def _mass2tof(masses_arr: np.ndarray, mass_offset: float, mass_gain: float,
return (mass_gain * np.sqrt(masses_arr) + mass_offset) / time_res


def _tof2mass(tof_arr: np.ndarray, mass_offset: float, mass_gain: float,
time_res: float) -> np.ndarray:
"""Convert array of time of flight values to equivalent m/z

Args:
tof_arr (array_like):
Array of time of flight values
mass_offset (float):
Mass offset for parabolic transformation
mass_gain (float):
Mass gain for parabolic transformation
time_res (float):
Time resolution for scaling parabolic transformation

Returns:
array_like:
Array of m/z values; indicies paried to `tof_range`
"""
return (((time_res * tof_arr) - mass_offset) / mass_gain) ** 2


def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray,
time_res: float) -> None:
"""Converts and stores provided mass ranges as time of flight ranges within fov metadata
Expand Down Expand Up @@ -521,3 +542,65 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None)
outs = {name: _extract_bin.c_total_counts(bytes(bf, 'utf-8')) for name, bf in bin_files}

return outs


def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None,
panel_df: pd.DataFrame = None, range_pad: float = 0.5):
"""Retrieves total spectra for each field of view

Args:
data_dir (str | PathLike):
Directory containing bin files as well as accompanying json metadata files
include_fovs (List | None):
List of fovs to include. Includes all if None.
panel_df (pd.DataFrame | None):
If not None, get default callibration information
range_offset (float):
Mass padding below the lowest and highest masses to consider when binning.
The time-of-flight array go from TOF of (lowest mass - 0.5) to (highest_mass + 0.5).

Returns:
tuple (dict, dict, list):
dict of total spectra and the corresponding low and high ranges, with fov names as keys
"""
if range_pad < 0:
raise ValueError("range_pad must be >= 0")

fov_metadata = _find_bin_files(data_dir, include_fovs)
if panel_df is not None:
for fov_info in fov_metadata.values():
_fill_fov_metadata(data_dir, fov_info, panel_df, False, 500e-6)

bin_metadata = list(fov_metadata.items())

# TODO: this assumes the panel_df is sorted
lowest_mass = panel_df.loc[0, "Stop"] - range_pad
highest_mass = panel_df.loc[panel_df.shape[0] - 1, "Stop"] + range_pad

# store the spectra, as well as the time intervals for each FOV
spectra = {}
tof_interval = {}
for fov_name, fov_info in bin_metadata:
# compute the low and high boundaries, this will differ per FOV
mass_offset = fov_info["mass_offset"]
mass_gain = fov_info["mass_gain"]
tof_boundaries = _mass2tof(
np.array([lowest_mass, highest_mass]), mass_offset, mass_gain, 500e-6
).astype(np.uint16)

# set the boundaries
tof_interval[fov_name] = tof_boundaries

# extract the spectra on an individual basis per channel
spectra[fov_name] = _extract_bin.c_total_spectra(
bytes(os.path.join(data_dir, fov_info["bin"]), "utf-8"),
tof_boundaries[0],
tof_boundaries[1]
)

# generate equivalent m/z values
tof_arr = np.arange(tof_boundaries[0], tof_boundaries[1] + 1)
mass_arr = _tof2mass(tof_arr, mass_offset, mass_gain, 500e-6)
fov_info["mass_spectra_points"] = mass_arr

return spectra, tof_interval, fov_metadata
21 changes: 20 additions & 1 deletion tests/bin_files_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,4 +343,23 @@ def test_get_total_counts(test_dir, fov):
bytes(bf, 'utf-8'), np.array([0], np.uint16),
np.array([-1], dtype=np.uint16), np.array([False], dtype=np.uint8)
)
assert (total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :]))
assert total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :])


@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles)
@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified')
def test_get_total_spectra(test_dir, fov, panel):
# ensure range_pad is positive
with pytest.raises(ValueError):
bin_files.get_total_spectra(
test_dir,
[fov['json'].split('.')[0]],
panel,
range_pad=-0.1
)

bin_files.get_total_spectra(
test_dir,
[fov['json'].split('.')[0]],
panel
)
Loading