diff --git a/src/mibi_bin_tools/_extract_bin.pyx b/src/mibi_bin_tools/_extract_bin.pyx index e9db5d3..7e3aebd 100644 --- a/src/mibi_bin_tools/_extract_bin.pyx +++ b/src/mibi_bin_tools/_extract_bin.pyx @@ -413,6 +413,75 @@ cdef MAXINDEX_t _extract_total_counts(const char* filename): return counts +cdef _extract_total_spectra(const char* filename, DTYPE_t low_range, DTYPE_t high_range): + """Extract total spectra from bin file + + Args: + filename (const char*): + Name of bin file to extract + low_range (np.uint16_t): + The lowest time interval to consider + high_range (np.uint16_t): + The highest time interval to consider + """ + cdef DTYPE_t num_x, num_y, num_trig, num_frames, desc_len, trig, num_pulses, pulse, time + cdef MAXINDEX_t data_start, pix + + # 10MB buffer + cdef MAXINDEX_t BUFFER_SIZE = 10 * 1024 * 1024 + cdef char* file_buffer = malloc(BUFFER_SIZE * sizeof(char)) + cdef MAXINDEX_t buffer_idx = 0 + + # open file + cdef FILE* fp + fp = fopen(filename, "rb") + + # note, if cython has packed structs, this would be easier + # or even macros tbh + fseek(fp, 0x6, SEEK_SET) + fread(&num_x, sizeof(DTYPE_t), 1, fp) + fread(&num_y, sizeof(DTYPE_t), 1, fp) + fread(&num_trig, sizeof(DTYPE_t), 1, fp) + fread(&num_frames, sizeof(DTYPE_t), 1, fp) + fseek(fp, 0x2, SEEK_CUR) + fread(&desc_len, sizeof(DTYPE_t), 1, fp) + + spectra_by_pixel = \ + cvarray( + shape=((num_x) * (num_y), + (high_range) - (low_range) + 1), + itemsize=sizeof(SMALL_t), + format='B' + ) + cdef SMALL_t[:, :] spectra_by_pixel_view = spectra_by_pixel + spectra_by_pixel_view[:, :] = 0 + + data_start = \ + (num_x) * (num_y) * (num_frames) * 8 + desc_len + 0x12 + + fseek(fp, data_start, SEEK_SET) + fread(file_buffer, sizeof(char), BUFFER_SIZE, fp) + for pix in range((num_x) * (num_y)): + for trig in range(num_trig): + _check_buffer_refill(fp, file_buffer, &buffer_idx, 0x8 * sizeof(char), BUFFER_SIZE) + memcpy(&num_pulses, file_buffer + buffer_idx + 0x6, sizeof(time)) + buffer_idx += 0x8 + for pulse in range(num_pulses): + _check_buffer_refill(fp, file_buffer, &buffer_idx, 0x5 * sizeof(char), BUFFER_SIZE) + memcpy(&time, file_buffer + buffer_idx, sizeof(time)) + buffer_idx += 0x5 + + if time >= low_range and time <= high_range: + spectra_by_pixel_view[pix, (time) - (low_range)] += 1 + + fclose(fp) + free(file_buffer) + + return np.asarray(spectra_by_pixel, dtype=np.uint8).reshape( + (num_x, num_y, high_range - low_range + 1) + ) + + def c_extract_bin(char* filename, DTYPE_t[:] low_range, DTYPE_t[:] high_range, SMALL_t[:] calc_intensity): return np.asarray( @@ -437,3 +506,6 @@ def c_pulse_height_vs_positive_pixel(char* filename, DTYPE_t low_range, DTYPE_t def c_total_counts(char* filename): counts = _extract_total_counts(filename) return int(counts) + +def c_total_spectra(char* filename, DTYPE_t low_range, DTYPE_t high_range): + return _extract_total_spectra(filename, low_range, high_range) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index f46be9a..4f724cb 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -32,6 +32,27 @@ def _mass2tof(masses_arr: np.ndarray, mass_offset: float, mass_gain: float, return (mass_gain * np.sqrt(masses_arr) + mass_offset) / time_res +def _tof2mass(tof_arr: np.ndarray, mass_offset: float, mass_gain: float, + time_res: float) -> np.ndarray: + """Convert array of time of flight values to equivalent m/z + + Args: + tof_arr (array_like): + Array of time of flight values + mass_offset (float): + Mass offset for parabolic transformation + mass_gain (float): + Mass gain for parabolic transformation + time_res (float): + Time resolution for scaling parabolic transformation + + Returns: + array_like: + Array of m/z values; indicies paried to `tof_range` + """ + return (((time_res * tof_arr) - mass_offset) / mass_gain) ** 2 + + def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray, time_res: float) -> None: """Converts and stores provided mass ranges as time of flight ranges within fov metadata @@ -521,3 +542,65 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None) outs = {name: _extract_bin.c_total_counts(bytes(bf, 'utf-8')) for name, bf in bin_files} return outs + + +def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None, + panel_df: pd.DataFrame = None, range_pad: float = 0.5): + """Retrieves total spectra for each field of view + + Args: + data_dir (str | PathLike): + Directory containing bin files as well as accompanying json metadata files + include_fovs (List | None): + List of fovs to include. Includes all if None. + panel_df (pd.DataFrame | None): + If not None, get default callibration information + range_pad (float): + Mass padding below the lowest and highest masses to consider when binning. + The time-of-flight array go from TOF of (lowest mass - 0.5) to (highest_mass + 0.5). + + Returns: + tuple (dict, dict, list): + dict of total spectra and the corresponding low and high ranges, with fov names as keys + """ + if range_pad < 0: + raise ValueError("range_pad must be >= 0") + + fov_metadata = _find_bin_files(data_dir, include_fovs) + if panel_df is not None: + for fov_info in fov_metadata.values(): + _fill_fov_metadata(data_dir, fov_info, panel_df, False, 500e-6) + + bin_metadata = list(fov_metadata.items()) + + # TODO: this assumes the panel_df is sorted + lowest_mass = panel_df.loc[0, "Stop"] - range_pad + highest_mass = panel_df.loc[panel_df.shape[0] - 1, "Stop"] + range_pad + + # store the spectra, as well as the time intervals for each FOV + spectra = {} + tof_interval = {} + for fov_name, fov_info in bin_metadata: + # compute the low and high boundaries, this will differ per FOV + mass_offset = fov_info["mass_offset"] + mass_gain = fov_info["mass_gain"] + tof_boundaries = _mass2tof( + np.array([lowest_mass, highest_mass]), mass_offset, mass_gain, 500e-6 + ).astype(np.uint16) + + # set the boundaries + tof_interval[fov_name] = tof_boundaries + + # extract the spectra on an individual basis per channel + spectra[fov_name] = _extract_bin.c_total_spectra( + bytes(os.path.join(data_dir, fov_info["bin"]), "utf-8"), + tof_boundaries[0], + tof_boundaries[1] + ) + + # generate equivalent m/z values + tof_arr = np.arange(tof_boundaries[0], tof_boundaries[1] + 1) + mass_arr = _tof2mass(tof_arr, mass_offset, mass_gain, 500e-6) + fov_info["mass_spectra_points"] = mass_arr + + return spectra, tof_interval, fov_metadata diff --git a/tests/bin_files_test.py b/tests/bin_files_test.py index 6de9640..ad611e5 100644 --- a/tests/bin_files_test.py +++ b/tests/bin_files_test.py @@ -343,4 +343,23 @@ def test_get_total_counts(test_dir, fov): bytes(bf, 'utf-8'), np.array([0], np.uint16), np.array([-1], dtype=np.uint16), np.array([False], dtype=np.uint8) ) - assert (total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :])) + assert total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :]) + + +@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles) +@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified') +def test_get_total_spectra(test_dir, fov, panel): + # ensure range_pad is positive + with pytest.raises(ValueError): + bin_files.get_total_spectra( + test_dir, + [fov['json'].split('.')[0]], + panel, + range_pad=-0.1 + ) + + bin_files.get_total_spectra( + test_dir, + [fov['json'].split('.')[0]], + panel + )