From 8ba94964959b2eb0ba240415148a16f29165c111 Mon Sep 17 00:00:00 2001 From: ackagel Date: Wed, 1 Jun 2022 19:24:27 +0000 Subject: [PATCH 01/12] adds total spectra extraction --- mibi_bin_tools/_extract_bin.pyx | 65 ++++++++++++++++++++++++++++++++- mibi_bin_tools/bin_files.py | 31 ++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/mibi_bin_tools/_extract_bin.pyx b/mibi_bin_tools/_extract_bin.pyx index 8dd993e..518ca74 100644 --- a/mibi_bin_tools/_extract_bin.pyx +++ b/mibi_bin_tools/_extract_bin.pyx @@ -4,7 +4,7 @@ from cython cimport cdivision, boundscheck, wraparound from libc.stdlib cimport malloc, free, realloc, qsort from libc.string cimport memcpy, memset from libc.stdio cimport fopen, fclose, FILE, EOF, fseek, SEEK_SET, SEEK_CUR, fread -from libc.limits cimport USHRT_MAX +from libc.limits cimport USHRT_MAX, UINT_MAX import numpy as np cimport numpy as np @@ -369,6 +369,61 @@ cdef MAXINDEX_t _extract_total_counts(const char* filename): return counts + +cdef void _extract_total_spectra(const char* filename, MAXINDEX_t* total_spectra): + """Extract total spectra from bin file + + Args: + filename (const char*): + Name of bin file to extract + """ + cdef DTYPE_t num_x, num_y, num_trig, num_frames, desc_len, trig, num_pulses, pulse, time + cdef DTYPE_t intensity + cdef SMALL_t width + cdef MAXINDEX_t data_start, pix + + # 10MB buffer + cdef MAXINDEX_t BUFFER_SIZE = 10 * 1024 * 1024 + cdef char* file_buffer = malloc(BUFFER_SIZE * sizeof(char)) + cdef MAXINDEX_t buffer_idx = 0 + + # open file + cdef FILE* fp + fp = fopen(filename, "rb") + + # note, if cython has packed structs, this would be easier + # or even macros tbh + fseek(fp, 0x6, SEEK_SET) + fread(&num_x, sizeof(DTYPE_t), 1, fp) + fread(&num_y, sizeof(DTYPE_t), 1, fp) + fread(&num_trig, sizeof(DTYPE_t), 1, fp) + fread(&num_frames, sizeof(DTYPE_t), 1, fp) + fseek(fp, 0x2, SEEK_CUR) + fread(&desc_len, sizeof(DTYPE_t), 1, fp) + + data_start = \ + (num_x) * (num_y) * (num_frames) * 8 + desc_len + 0x12 + + fseek(fp, data_start, SEEK_SET) + fread(file_buffer, sizeof(char), BUFFER_SIZE, fp) + for pix in range((num_x) * (num_y)): + for trig in range(num_trig): + _check_buffer_refill(fp, file_buffer, &buffer_idx, 0x8 * sizeof(char), BUFFER_SIZE) + memcpy(&num_pulses, file_buffer + buffer_idx + 0x6, sizeof(time)) + buffer_idx += 0x8 + for pulse in range(num_pulses): + _check_buffer_refill(fp, file_buffer, &buffer_idx, 0x5 * sizeof(char), BUFFER_SIZE) + memcpy(&time, file_buffer + buffer_idx, sizeof(time)) + memcpy(&width, file_buffer + buffer_idx + 0x2, sizeof(width)) + memcpy(&intensity, file_buffer + buffer_idx + 0x3, sizeof(intensity)) + buffer_idx += 0x5 + + total_spectra[time] += 1 + + fclose(fp) + free(file_buffer) + + def c_extract_bin(char* filename, DTYPE_t[:] low_range, DTYPE_t[:] high_range, SMALL_t[:] calc_intensity): return np.asarray( @@ -405,3 +460,11 @@ def c_pulse_height_vs_positive_pixel(char* filename, DTYPE_t low_range, DTYPE_t def c_total_counts(char* filename): counts = _extract_total_counts(filename) return int(counts) + +def c_total_spectra(char* filename): + cdef MAXINDEX_t total_spectra[UINT_MAX] + memset(total_spectra, 0, UINT_MAX * sizeof(MAXINDEX_t)) + + _extract_total_spectra(filename, total_spectra) + + return np.asarray(total_spectra) diff --git a/mibi_bin_tools/bin_files.py b/mibi_bin_tools/bin_files.py index 1ec5264..1d653ea 100644 --- a/mibi_bin_tools/bin_files.py +++ b/mibi_bin_tools/bin_files.py @@ -441,3 +441,34 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None) outs = {name: _extract_bin.c_total_counts(bytes(bf, 'utf-8')) for name, bf in bin_files} return outs + + +def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None, + panel_df: pd.DataFrame = None): + """Retrieves total spectra for each field of view + + Args: + data_dir (str | PathLike): + Directory containing bin files as well as accompanying json metadata files + include_fovs (List | None): + List of fovs to include. Includes all if None. + panel_df (pd.DataFrame | None): + If not None, get default callibration information + + Returns: + dict: + dictionary of total spectra, with fov names as keys + """ + + fov_files = _find_bin_files(data_dir, include_fovs) + + if panel_df is not None: + for fov in fov_files.values(): + _fill_fov_metadata(data_dir, fov, panel_df, False, 500e-6) + + bin_files = \ + [(name, os.path.join(data_dir, fov['bin'])) for name, fov in fov_files.items()] + + outs = {name: _extract_bin.c_total_spectra(bytes(bf, 'utf-8')) for name, bf in bin_files} + + return outs, fov_files From df8318aa5572cf6d0ff999245ce6e2cf5136cac8 Mon Sep 17 00:00:00 2001 From: ackagel Date: Wed, 1 Jun 2022 19:28:26 +0000 Subject: [PATCH 02/12] adds test for get_total_spectra --- tests/bin_files_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/bin_files_test.py b/tests/bin_files_test.py index 9e6da91..d780b67 100644 --- a/tests/bin_files_test.py +++ b/tests/bin_files_test.py @@ -252,3 +252,13 @@ def test_get_total_counts(test_dir, fov): np.array([-1], dtype=np.uint16), np.array([False], dtype=np.uint8) ) assert(total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :])) + + +@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles) +@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified') +def test_get_total_spectra(test_dir, fov, panel): + bin_files.get_total_spectra( + test_dir, + fov['json'].split('.')[0], + panel + ) From eaa376f032dfb424b9a4dd507f9a2f66b8048b62 Mon Sep 17 00:00:00 2001 From: ackagel Date: Wed, 1 Jun 2022 20:05:40 +0000 Subject: [PATCH 03/12] fixes storage request size for total spectra array --- mibi_bin_tools/_extract_bin.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mibi_bin_tools/_extract_bin.pyx b/mibi_bin_tools/_extract_bin.pyx index 518ca74..341bd6e 100644 --- a/mibi_bin_tools/_extract_bin.pyx +++ b/mibi_bin_tools/_extract_bin.pyx @@ -4,7 +4,7 @@ from cython cimport cdivision, boundscheck, wraparound from libc.stdlib cimport malloc, free, realloc, qsort from libc.string cimport memcpy, memset from libc.stdio cimport fopen, fclose, FILE, EOF, fseek, SEEK_SET, SEEK_CUR, fread -from libc.limits cimport USHRT_MAX, UINT_MAX +from libc.limits cimport USHRT_MAX import numpy as np cimport numpy as np @@ -462,8 +462,8 @@ def c_total_counts(char* filename): return int(counts) def c_total_spectra(char* filename): - cdef MAXINDEX_t total_spectra[UINT_MAX] - memset(total_spectra, 0, UINT_MAX * sizeof(MAXINDEX_t)) + cdef MAXINDEX_t total_spectra[USHRT_MAX] + memset(total_spectra, 0, USHRT_MAX * sizeof(MAXINDEX_t)) _extract_total_spectra(filename, total_spectra) From 5b5c3827cfabb6cd152c585d64dd16ccaea96294 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Thu, 19 Oct 2023 16:29:37 -0700 Subject: [PATCH 04/12] Fix test for get_total_spectra --- tests/bin_files_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/bin_files_test.py b/tests/bin_files_test.py index dd189c9..d44d9d9 100644 --- a/tests/bin_files_test.py +++ b/tests/bin_files_test.py @@ -343,7 +343,7 @@ def test_get_total_counts(test_dir, fov): bytes(bf, 'utf-8'), np.array([0], np.uint16), np.array([-1], dtype=np.uint16), np.array([False], dtype=np.uint8) ) - assert(total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :])) + assert total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :]) @parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles) @@ -351,6 +351,6 @@ def test_get_total_counts(test_dir, fov): def test_get_total_spectra(test_dir, fov, panel): bin_files.get_total_spectra( test_dir, - fov['json'].split('.')[0], + [fov['json'].split('.')[0]], panel ) From b501ece3bc8e9ab30b2878d6a72fa9ab7efa4133 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Fri, 20 Oct 2023 16:01:03 -0700 Subject: [PATCH 05/12] Default spectra extraction to a per-pixel level --- src/mibi_bin_tools/_extract_bin.pyx | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/mibi_bin_tools/_extract_bin.pyx b/src/mibi_bin_tools/_extract_bin.pyx index b18928d..bec46e6 100644 --- a/src/mibi_bin_tools/_extract_bin.pyx +++ b/src/mibi_bin_tools/_extract_bin.pyx @@ -413,7 +413,7 @@ cdef MAXINDEX_t _extract_total_counts(const char* filename): return counts -cdef void _extract_total_spectra(const char* filename, MAXINDEX_t* total_spectra): +cdef _extract_total_spectra(const char* filename): """Extract total spectra from bin file Args: @@ -444,6 +444,15 @@ cdef void _extract_total_spectra(const char* filename, MAXINDEX_t* total_spectra fseek(fp, 0x2, SEEK_CUR) fread(&desc_len, sizeof(DTYPE_t), 1, fp) + spectra_by_pixel = \ + cvarray( + shape=((num_x) * (num_y), USHRT_MAX), + itemsize=sizeof(MAXINDEX_t), + format='Q' + ) + cdef MAXINDEX_t[:, :] spectra_by_pixel_view = spectra_by_pixel + spectra_by_pixel_view[:, :] = 0 + data_start = \ (num_x) * (num_y) * (num_frames) * 8 + desc_len + 0x12 @@ -461,11 +470,15 @@ cdef void _extract_total_spectra(const char* filename, MAXINDEX_t* total_spectra memcpy(&intensity, file_buffer + buffer_idx + 0x3, sizeof(intensity)) buffer_idx += 0x5 - total_spectra[time] += 1 + spectra_by_pixel_view[pix, time] += 1 fclose(fp) free(file_buffer) + return np.asarray(spectra_by_pixel, dtype=np.uint64).reshape( + (num_x * num_y, USHRT_MAX) + ) + def c_extract_bin(char* filename, DTYPE_t[:] low_range, DTYPE_t[:] high_range, SMALL_t[:] calc_intensity): @@ -493,9 +506,4 @@ def c_total_counts(char* filename): return int(counts) def c_total_spectra(char* filename): - cdef MAXINDEX_t total_spectra[USHRT_MAX] - memset(total_spectra, 0, USHRT_MAX * sizeof(MAXINDEX_t)) - - _extract_total_spectra(filename, total_spectra) - - return np.asarray(total_spectra) + return _extract_total_spectra(filename) From 660588203efe843c4a6b4811addce799b5b918c6 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Tue, 31 Oct 2023 12:51:38 -0700 Subject: [PATCH 06/12] Make sure every pixel has the full TOF range of the panel --- src/mibi_bin_tools/_extract_bin.pyx | 31 ++++++++++++---------- src/mibi_bin_tools/bin_files.py | 40 +++++++++++++++++++++++------ 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/mibi_bin_tools/_extract_bin.pyx b/src/mibi_bin_tools/_extract_bin.pyx index bec46e6..f2a9214 100644 --- a/src/mibi_bin_tools/_extract_bin.pyx +++ b/src/mibi_bin_tools/_extract_bin.pyx @@ -413,16 +413,18 @@ cdef MAXINDEX_t _extract_total_counts(const char* filename): return counts -cdef _extract_total_spectra(const char* filename): +cdef _extract_total_spectra(const char* filename, DTYPE_t low_range, DTYPE_t high_range): """Extract total spectra from bin file Args: filename (const char*): Name of bin file to extract + low_range (np.uint16_t): + The lowest time interval to consider + high_range (np.uint16_t): + The highest time interval to consider """ cdef DTYPE_t num_x, num_y, num_trig, num_frames, desc_len, trig, num_pulses, pulse, time - cdef DTYPE_t intensity - cdef SMALL_t width cdef MAXINDEX_t data_start, pix # 10MB buffer @@ -446,11 +448,12 @@ cdef _extract_total_spectra(const char* filename): spectra_by_pixel = \ cvarray( - shape=((num_x) * (num_y), USHRT_MAX), - itemsize=sizeof(MAXINDEX_t), - format='Q' + shape=((num_x) * (num_y), + (high_range) - (low_range) + 1), + itemsize=sizeof(DTYPE_t), + format='H' ) - cdef MAXINDEX_t[:, :] spectra_by_pixel_view = spectra_by_pixel + cdef DTYPE_t[:, :] spectra_by_pixel_view = spectra_by_pixel spectra_by_pixel_view[:, :] = 0 data_start = \ @@ -466,17 +469,16 @@ cdef _extract_total_spectra(const char* filename): for pulse in range(num_pulses): _check_buffer_refill(fp, file_buffer, &buffer_idx, 0x5 * sizeof(char), BUFFER_SIZE) memcpy(&time, file_buffer + buffer_idx, sizeof(time)) - memcpy(&width, file_buffer + buffer_idx + 0x2, sizeof(width)) - memcpy(&intensity, file_buffer + buffer_idx + 0x3, sizeof(intensity)) buffer_idx += 0x5 - spectra_by_pixel_view[pix, time] += 1 + if time >= low_range and time <= high_range: + spectra_by_pixel_view[pix, (time) - (low_range)] += 1 fclose(fp) free(file_buffer) - return np.asarray(spectra_by_pixel, dtype=np.uint64).reshape( - (num_x * num_y, USHRT_MAX) + return np.asarray(spectra_by_pixel, dtype=np.uint16).reshape( + (num_x, num_y, high_range - low_range + 1) ) @@ -505,5 +507,6 @@ def c_total_counts(char* filename): counts = _extract_total_counts(filename) return int(counts) -def c_total_spectra(char* filename): - return _extract_total_spectra(filename) +def c_total_spectra(char* filename, DTYPE_t low_range, DTYPE_t high_range): + print("Running spectra extraction") + return _extract_total_spectra(filename, low_range, high_range) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index 9772b98..c3dda0c 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -524,7 +524,7 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None) def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None, - panel_df: pd.DataFrame = None): + panel_df: pd.DataFrame = None, range_pad=0.5): """Retrieves total spectra for each field of view Args: @@ -534,21 +534,45 @@ def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None List of fovs to include. Includes all if None. panel_df (pd.DataFrame | None): If not None, get default callibration information + range_offset (float): + Mass padding below the lowest and highest masses to consider when binning. + The time-of-flight array go from TOF of (lowest mass - 0.5) to (highest_mass + 0.5). Returns: - dict: - dictionary of total spectra, with fov names as keys + tuple (dict, dict, list): + dict of total spectra and the corresponding low and high ranges, with fov names as keys """ + if range_pad < 0: + raise ValueError("range_pad must be >= 0") fov_files = _find_bin_files(data_dir, include_fovs) - if panel_df is not None: for fov in fov_files.values(): _fill_fov_metadata(data_dir, fov, panel_df, False, 500e-6) - bin_files = \ - [(name, os.path.join(data_dir, fov['bin'])) for name, fov in fov_files.items()] + bin_files = list(fov_files.items()) + + # TODO: this assumes the panel_df is sorted + lowest_mass = panel_df.loc[0, "Stop"] - range_pad + highest_mass = panel_df.loc[panel_df.shape[0] - 1, "Stop"] + range_pad + + # store the spectra, as well as the time intervals for each FOV + spectra = {} + tof_interval = {} + for name, fov in bin_files: + # compute the low and high boundaries, this will differ per FOV + mass_offset = fov["mass_offset"] + mass_gain = fov["mass_gain"] + tof_boundaries = _mass2tof( + np.array([lowest_mass, highest_mass]), mass_offset, mass_gain, 500e-6 + ).astype(np.uint16) + + # set the boundaries + tof_interval[name] = tof_boundaries - outs = {name: _extract_bin.c_total_spectra(bytes(bf, 'utf-8')) for name, bf in bin_files} + # extract the spectra on an individual basis per channel + spectra[name] = _extract_bin.c_total_spectra( + bytes(os.path.join(data_dir, fov["bin"]), "utf-8"), tof_boundaries[0], tof_boundaries[1] + ) - return outs, fov_files + return spectra, tof_interval, fov_files From 303925bfcd6b76ec58d3e04cd63c72b8ad388c96 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Tue, 31 Oct 2023 13:43:17 -0700 Subject: [PATCH 07/12] Even more size saving by using np.uint8 --- src/mibi_bin_tools/_extract_bin.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mibi_bin_tools/_extract_bin.pyx b/src/mibi_bin_tools/_extract_bin.pyx index f2a9214..7e3aebd 100644 --- a/src/mibi_bin_tools/_extract_bin.pyx +++ b/src/mibi_bin_tools/_extract_bin.pyx @@ -450,10 +450,10 @@ cdef _extract_total_spectra(const char* filename, DTYPE_t low_range, DTYPE_t hig cvarray( shape=((num_x) * (num_y), (high_range) - (low_range) + 1), - itemsize=sizeof(DTYPE_t), - format='H' + itemsize=sizeof(SMALL_t), + format='B' ) - cdef DTYPE_t[:, :] spectra_by_pixel_view = spectra_by_pixel + cdef SMALL_t[:, :] spectra_by_pixel_view = spectra_by_pixel spectra_by_pixel_view[:, :] = 0 data_start = \ @@ -477,7 +477,7 @@ cdef _extract_total_spectra(const char* filename, DTYPE_t low_range, DTYPE_t hig fclose(fp) free(file_buffer) - return np.asarray(spectra_by_pixel, dtype=np.uint16).reshape( + return np.asarray(spectra_by_pixel, dtype=np.uint8).reshape( (num_x, num_y, high_range - low_range + 1) ) @@ -508,5 +508,4 @@ def c_total_counts(char* filename): return int(counts) def c_total_spectra(char* filename, DTYPE_t low_range, DTYPE_t high_range): - print("Running spectra extraction") return _extract_total_spectra(filename, low_range, high_range) From e125c36c894d765212aefb03054aafd055513231 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Tue, 31 Oct 2023 19:30:28 -0700 Subject: [PATCH 08/12] Add type annotation for range_pad in extract_total_spectra --- src/mibi_bin_tools/bin_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index c3dda0c..ca7653a 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -524,7 +524,7 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None) def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None, - panel_df: pd.DataFrame = None, range_pad=0.5): + panel_df: pd.DataFrame = None, range_pad: float =0.5): """Retrieves total spectra for each field of view Args: From 365b9424f00de66842e458949a76c19b8c6a9b07 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Tue, 31 Oct 2023 19:30:41 -0700 Subject: [PATCH 09/12] Spacing --- src/mibi_bin_tools/bin_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index ca7653a..02669f8 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -524,7 +524,7 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None) def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None, - panel_df: pd.DataFrame = None, range_pad: float =0.5): + panel_df: pd.DataFrame = None, range_pad: float = 0.5): """Retrieves total spectra for each field of view Args: From 6a086bd062cd4ef61cca929bb5c10bfea44a1687 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Mon, 6 Nov 2023 16:01:03 -0800 Subject: [PATCH 10/12] Store equivalent m/z values for each FOV for plotting purposes --- src/mibi_bin_tools/bin_files.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index 02669f8..11e32e8 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -32,6 +32,27 @@ def _mass2tof(masses_arr: np.ndarray, mass_offset: float, mass_gain: float, return (mass_gain * np.sqrt(masses_arr) + mass_offset) / time_res +def _tof2mass(tof_arr: np.ndarray, mass_offset: float, mass_gain: float, + time_res: float) -> np.ndarray: + """Convert array of time of flight values to equivalent m/z + + Args: + tof_arr (array_like): + Array of time of flight values + mass_offset (float): + Mass offset for parabolic transformation + mass_gain (float): + Mass gain for parabolic transformation + time_res (float): + Time resolution for scaling parabolic transformation + + Returns: + array_like: + Array of m/z values; indicies paried to `tof_range` + """ + return (((time_res * tof_arr) - mass_offset) / mass_gain) ** 2 + + def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray, time_res: float) -> None: """Converts and stores provided mass ranges as time of flight ranges within fov metadata @@ -575,4 +596,9 @@ def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None bytes(os.path.join(data_dir, fov["bin"]), "utf-8"), tof_boundaries[0], tof_boundaries[1] ) + # generate equivalent m/z values + tof_arr = np.arange(tof_boundaries[0], tof_boundaries[1] + 1) + mass_arr = _tof2mass(tof_arr, mass_offset, mass_gain, 500e-6) + fov["mass_spectra_points"] = mass_arr + return spectra, tof_interval, fov_files From 78e38572f1b76a78e12b50106f844d615f076ec7 Mon Sep 17 00:00:00 2001 From: alex-l-kong Date: Tue, 7 Nov 2023 10:26:04 -0800 Subject: [PATCH 11/12] Clarify fov_files to fov_metadata, and name to fov_name --- src/mibi_bin_tools/bin_files.py | 26 ++++++++++++++------------ tests/bin_files_test.py | 9 +++++++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index 11e32e8..4967830 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -566,12 +566,12 @@ def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None if range_pad < 0: raise ValueError("range_pad must be >= 0") - fov_files = _find_bin_files(data_dir, include_fovs) + fov_metadata = _find_bin_files(data_dir, include_fovs) if panel_df is not None: - for fov in fov_files.values(): - _fill_fov_metadata(data_dir, fov, panel_df, False, 500e-6) + for fov_info in fov_metadata.values(): + _fill_fov_metadata(data_dir, fov_info, panel_df, False, 500e-6) - bin_files = list(fov_files.items()) + bin_metadata = list(fov_metadata.items()) # TODO: this assumes the panel_df is sorted lowest_mass = panel_df.loc[0, "Stop"] - range_pad @@ -580,25 +580,27 @@ def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None # store the spectra, as well as the time intervals for each FOV spectra = {} tof_interval = {} - for name, fov in bin_files: + for fov_name, fov_info in bin_metadata: # compute the low and high boundaries, this will differ per FOV - mass_offset = fov["mass_offset"] - mass_gain = fov["mass_gain"] + mass_offset = fov_info["mass_offset"] + mass_gain = fov_info["mass_gain"] tof_boundaries = _mass2tof( np.array([lowest_mass, highest_mass]), mass_offset, mass_gain, 500e-6 ).astype(np.uint16) # set the boundaries - tof_interval[name] = tof_boundaries + tof_interval[fov_name] = tof_boundaries # extract the spectra on an individual basis per channel - spectra[name] = _extract_bin.c_total_spectra( - bytes(os.path.join(data_dir, fov["bin"]), "utf-8"), tof_boundaries[0], tof_boundaries[1] + spectra[fov_name] = _extract_bin.c_total_spectra( + bytes(os.path.join(data_dir, fov_info["bin"]), "utf-8"), + tof_boundaries[0], + tof_boundaries[1] ) # generate equivalent m/z values tof_arr = np.arange(tof_boundaries[0], tof_boundaries[1] + 1) mass_arr = _tof2mass(tof_arr, mass_offset, mass_gain, 500e-6) - fov["mass_spectra_points"] = mass_arr + fov_info["mass_spectra_points"] = mass_arr - return spectra, tof_interval, fov_files + return spectra, tof_interval, fov_metadata diff --git a/tests/bin_files_test.py b/tests/bin_files_test.py index d44d9d9..ad611e5 100644 --- a/tests/bin_files_test.py +++ b/tests/bin_files_test.py @@ -349,6 +349,15 @@ def test_get_total_counts(test_dir, fov): @parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles) @parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified') def test_get_total_spectra(test_dir, fov, panel): + # ensure range_pad is positive + with pytest.raises(ValueError): + bin_files.get_total_spectra( + test_dir, + [fov['json'].split('.')[0]], + panel, + range_pad=-0.1 + ) + bin_files.get_total_spectra( test_dir, [fov['json'].split('.')[0]], From c5ebb0ed9fcec4b17017b8f8261ce0b2cfb7a7e1 Mon Sep 17 00:00:00 2001 From: Alex Kong Date: Tue, 8 Oct 2024 14:37:16 -0700 Subject: [PATCH 12/12] Documentation fi --- src/mibi_bin_tools/bin_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mibi_bin_tools/bin_files.py b/src/mibi_bin_tools/bin_files.py index 4967830..4f724cb 100644 --- a/src/mibi_bin_tools/bin_files.py +++ b/src/mibi_bin_tools/bin_files.py @@ -555,7 +555,7 @@ def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None List of fovs to include. Includes all if None. panel_df (pd.DataFrame | None): If not None, get default callibration information - range_offset (float): + range_pad (float): Mass padding below the lowest and highest masses to consider when binning. The time-of-flight array go from TOF of (lowest mass - 0.5) to (highest_mass + 0.5).