From a56dca311372bdd6b36cbc60b790946747d270f2 Mon Sep 17 00:00:00 2001 From: mzouink Date: Tue, 12 Nov 2024 13:47:14 -0500 Subject: [PATCH 01/43] heavy unet tests --- dacapo/compute_context/local_torch.py | 7 +- .../architectures/cnnectome_unet_config.py | 2 +- .../threshold_post_processor.py | 4 +- .../tasks/predictors/distance_predictor.py | 12 +- dacapo/validate.py | 7 +- tests/operations/test_train.py | 111 ++++++++++++++++++ 6 files changed, 132 insertions(+), 11 deletions(-) diff --git a/dacapo/compute_context/local_torch.py b/dacapo/compute_context/local_torch.py index a547b7dd7..08e813712 100644 --- a/dacapo/compute_context/local_torch.py +++ b/dacapo/compute_context/local_torch.py @@ -56,9 +56,10 @@ def device(self): if self._device is None: if torch.cuda.is_available(): # TODO: make this more sophisticated, for multiple GPUs for instance - free = torch.cuda.mem_get_info()[0] / 1024**3 - if free < self.oom_limit: # less than 1 GB free, decrease chance of OOM - return torch.device("cpu") + # commented out code below is for checking free memory and falling back on CPU, whhen model in GPU and memory is low model get moved to CPU + # free = torch.cuda.mem_get_info()[0] / 1024**3 + # if free < self.oom_limit: # less than 1 GB free, decrease chance of OOM + # return torch.device("cpu") return torch.device("cuda") # Multiple MPS ops are not available yet : https://github.com/pytorch/pytorch/issues/77764 # got error aten::max_pool3d_with_indices diff --git a/dacapo/experiments/architectures/cnnectome_unet_config.py b/dacapo/experiments/architectures/cnnectome_unet_config.py index 7eab80115..643921386 100644 --- a/dacapo/experiments/architectures/cnnectome_unet_config.py +++ b/dacapo/experiments/architectures/cnnectome_unet_config.py @@ -128,6 +128,6 @@ class CNNectomeUNetConfig(ArchitectureConfig): }, ) batch_norm: bool = attr.ib( - default=True, + default=False, metadata={"help_text": "Whether to use batch normalization."}, ) diff --git a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py index 59059e516..38bfda883 100644 --- a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py @@ -18,7 +18,9 @@ from funlib.persistence import Array from typing import Iterable +import logging +logger = logging.getLogger(__name__) class ThresholdPostProcessor(PostProcessor): """ @@ -135,7 +137,7 @@ def process_block(block): data = input_array[write_roi] > parameters.threshold data = data.astype(np.uint8) if int(data.max()) == 0: - print("No data in block", write_roi) + logger.debug("No data in block", write_roi) return output_array[write_roi] = data diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 861a9e1dd..568deae53 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -231,9 +231,11 @@ def create_distance_mask( ) slices = tmp.ndim * (slice(1, -1),) tmp[slices] = channel_mask + sampling = tuple(float(v) / 2 for v in voxel_size) + sampling = sampling[-len(tmp.shape) :] boundary_distance = distance_transform_edt( tmp, - sampling=voxel_size, + sampling=sampling, ) if self.epsilon is None: add = 0 @@ -315,13 +317,17 @@ def process( distances = np.ones(channel.shape, dtype=np.float32) * max_distance else: # get distances (voxel_size/2 because image is doubled) + sampling = tuple(float(v) / 2 for v in voxel_size) + # fixing the sampling for 2D images + if len(boundaries.shape) < len(sampling): + sampling = sampling[-len(boundaries.shape):] distances = distance_transform_edt( - boundaries, sampling=tuple(float(v) / 2 for v in voxel_size) + boundaries, sampling=sampling ) distances = distances.astype(np.float32) # restore original shape - downsample = (slice(None, None, 2),) * len(voxel_size) + downsample = (slice(None, None, 2),) * distances.ndim distances = distances[downsample] # todo: inverted distance diff --git a/dacapo/validate.py b/dacapo/validate.py index 4e091ff55..b826a6dbd 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -246,6 +246,9 @@ def validate_run(run: Run, iteration: int, datasets_config=None): # validation_dataset.name, # criterion, # ) + dataset_iteration_scores.append( + [getattr(scores, criterion) for criterion in scores.criteria] + ) except: logger.error( f"Could not evaluate run {run.name} on dataset {validation_dataset.name} with parameters {parameters}.", @@ -257,9 +260,7 @@ def validate_run(run: Run, iteration: int, datasets_config=None): # the evaluator # array_store.remove(output_array_identifier) - dataset_iteration_scores.append( - [getattr(scores, criterion) for criterion in scores.criteria] - ) + iteration_scores.append(dataset_iteration_scores) # array_store.remove(prediction_array_identifier) diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index a852101be..374621c6b 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -9,10 +9,58 @@ import pytest from pytest_lazy_fixtures import lf +from dacapo.experiments.run_config import RunConfig + import logging logging.basicConfig(level=logging.INFO) +from dacapo.experiments.architectures import DummyArchitectureConfig, CNNectomeUNetConfig + +import pytest + + +def unet_architecture(batch_norm, upsample,use_attention, three_d): + name = "3d_unet" if three_d else "2d_unet" + name = f"{name}_bn" if batch_norm else name + name = f"{name}_up" if upsample else name + name = f"{name}_att" if use_attention else name + + if three_d: + return CNNectomeUNetConfig( + name=name, + input_shape=(188, 188, 188), + eval_shape_increase=(72, 72, 72), + fmaps_in=1, + num_fmaps=6, + fmaps_out=6, + fmap_inc_factor=2, + downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], + constant_upsample=True, + upsample_factors=[(2, 2, 2)] if upsample else [], + batch_norm=batch_norm, + use_attention=use_attention, + ) + else: + return CNNectomeUNetConfig( + name=name, + input_shape=(2, 132, 132), + eval_shape_increase=(8, 32, 32), + fmaps_in=2, + num_fmaps=8, + fmaps_out=8, + fmap_inc_factor=2, + downsample_factors=[(1, 4, 4), (1, 4, 4)], + kernel_size_down=[[(1, 3, 3)] * 2] * 3, + kernel_size_up=[[(1, 3, 3)] * 2] * 2, + constant_upsample=True, + padding="valid", + batch_norm=batch_norm, + use_attention=use_attention, + upsample_factors=[(1, 2, 2)] if upsample else [], + ) + + # skip the test for the Apple Paravirtual device # that does not support Metal 2.0 @@ -59,3 +107,66 @@ def test_train( training_stats = stats_store.retrieve_training_stats(run_config.name) assert training_stats.trained_until() == run_config.num_iterations + + +@pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) +@pytest.mark.parametrize("task", [lf("distance_task")]) +@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) +@pytest.mark.parametrize("batch_norm", [True, False]) +@pytest.mark.parametrize("upsample", [True, False]) +@pytest.mark.parametrize("use_attention", [True, False]) +@pytest.mark.parametrize("three_d", [True, False]) +def test_train_unet( + datasplit, + task, + trainer, + batch_norm, + upsample, + use_attention, + three_d): + + store = create_config_store() + stats_store = create_stats_store() + weights_store = create_weights_store() + + architecture_config = unet_architecture(batch_norm, upsample,use_attention, three_d) + + run_config = RunConfig( + name=f"{architecture_config.name}_run", + task_config=task, + architecture_config=architecture_config, + trainer_config=trainer, + datasplit_config=datasplit, + repetition=0, + num_iterations=2, + ) + try: + store.store_run_config(run_config) + except Exception as e: + store.delete_run_config(run_config.name) + store.store_run_config(run_config) + + run = Run(run_config) + + # ------------------------------------- + + # train + + weights_store.store_weights(run, 0) + train_run(run) + + init_weights = weights_store.retrieve_weights(run.name, 0) + final_weights = weights_store.retrieve_weights(run.name, run.train_until) + + for name, weight in init_weights.model.items(): + weight_diff = (weight - final_weights.model[name]).sum() + assert abs(weight_diff) > np.finfo(weight_diff.numpy().dtype).eps, weight_diff + + # assert train_stats and validation_scores are available + + training_stats = stats_store.retrieve_training_stats(run_config.name) + + assert training_stats.trained_until() == run_config.num_iterations + + + \ No newline at end of file From 2d63df5ae02509c66ae5d2c7730bfcfc09fa0cd5 Mon Sep 17 00:00:00 2001 From: mzouink Date: Tue, 12 Nov 2024 18:49:28 +0000 Subject: [PATCH 02/43] :art: Format Python code with psf/black --- .../threshold_post_processor.py | 1 + .../tasks/predictors/distance_predictor.py | 6 +-- dacapo/validate.py | 4 +- tests/operations/test_train.py | 53 +++++++++---------- 4 files changed, 28 insertions(+), 36 deletions(-) diff --git a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py index 38bfda883..0c137e2f6 100644 --- a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py @@ -22,6 +22,7 @@ logger = logging.getLogger(__name__) + class ThresholdPostProcessor(PostProcessor): """ A post-processor that applies a threshold to the prediction. diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 568deae53..07cb92701 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -320,10 +320,8 @@ def process( sampling = tuple(float(v) / 2 for v in voxel_size) # fixing the sampling for 2D images if len(boundaries.shape) < len(sampling): - sampling = sampling[-len(boundaries.shape):] - distances = distance_transform_edt( - boundaries, sampling=sampling - ) + sampling = sampling[-len(boundaries.shape) :] + distances = distance_transform_edt(boundaries, sampling=sampling) distances = distances.astype(np.float32) # restore original shape diff --git a/dacapo/validate.py b/dacapo/validate.py index b826a6dbd..6e92430c9 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -247,7 +247,7 @@ def validate_run(run: Run, iteration: int, datasets_config=None): # criterion, # ) dataset_iteration_scores.append( - [getattr(scores, criterion) for criterion in scores.criteria] + [getattr(scores, criterion) for criterion in scores.criteria] ) except: logger.error( @@ -260,8 +260,6 @@ def validate_run(run: Run, iteration: int, datasets_config=None): # the evaluator # array_store.remove(output_array_identifier) - - iteration_scores.append(dataset_iteration_scores) # array_store.remove(prediction_array_identifier) diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index 374621c6b..be0a94d16 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -15,32 +15,35 @@ logging.basicConfig(level=logging.INFO) -from dacapo.experiments.architectures import DummyArchitectureConfig, CNNectomeUNetConfig +from dacapo.experiments.architectures import ( + DummyArchitectureConfig, + CNNectomeUNetConfig, +) import pytest -def unet_architecture(batch_norm, upsample,use_attention, three_d): +def unet_architecture(batch_norm, upsample, use_attention, three_d): name = "3d_unet" if three_d else "2d_unet" name = f"{name}_bn" if batch_norm else name name = f"{name}_up" if upsample else name name = f"{name}_att" if use_attention else name if three_d: - return CNNectomeUNetConfig( - name=name, - input_shape=(188, 188, 188), - eval_shape_increase=(72, 72, 72), - fmaps_in=1, - num_fmaps=6, - fmaps_out=6, - fmap_inc_factor=2, - downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], - constant_upsample=True, - upsample_factors=[(2, 2, 2)] if upsample else [], - batch_norm=batch_norm, - use_attention=use_attention, - ) + return CNNectomeUNetConfig( + name=name, + input_shape=(188, 188, 188), + eval_shape_increase=(72, 72, 72), + fmaps_in=1, + num_fmaps=6, + fmaps_out=6, + fmap_inc_factor=2, + downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], + constant_upsample=True, + upsample_factors=[(2, 2, 2)] if upsample else [], + batch_norm=batch_norm, + use_attention=use_attention, + ) else: return CNNectomeUNetConfig( name=name, @@ -61,7 +64,6 @@ def unet_architecture(batch_norm, upsample,use_attention, three_d): ) - # skip the test for the Apple Paravirtual device # that does not support Metal 2.0 @pytest.mark.filterwarnings("ignore:.*Metal 2.0.*:UserWarning") @@ -117,19 +119,15 @@ def test_train( @pytest.mark.parametrize("use_attention", [True, False]) @pytest.mark.parametrize("three_d", [True, False]) def test_train_unet( - datasplit, - task, - trainer, - batch_norm, - upsample, - use_attention, - three_d): - + datasplit, task, trainer, batch_norm, upsample, use_attention, three_d +): store = create_config_store() stats_store = create_stats_store() weights_store = create_weights_store() - architecture_config = unet_architecture(batch_norm, upsample,use_attention, three_d) + architecture_config = unet_architecture( + batch_norm, upsample, use_attention, three_d + ) run_config = RunConfig( name=f"{architecture_config.name}_run", @@ -167,6 +165,3 @@ def test_train_unet( training_stats = stats_store.retrieve_training_stats(run_config.name) assert training_stats.trained_until() == run_config.num_iterations - - - \ No newline at end of file From afcefaea0266b531fd7cd0a6bf6d89b00ac624f6 Mon Sep 17 00:00:00 2001 From: mzouink Date: Tue, 12 Nov 2024 16:37:18 -0500 Subject: [PATCH 03/43] test context --- tests/operations/test_context.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/operations/test_context.py diff --git a/tests/operations/test_context.py b/tests/operations/test_context.py new file mode 100644 index 000000000..b7f70500e --- /dev/null +++ b/tests/operations/test_context.py @@ -0,0 +1,14 @@ +import torch +from dacapo.compute_context import create_compute_context +import pytest + + +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +def test_create_compute_context(device): + compute_context = create_compute_context() + assert compute_context is not None + assert compute_context.device is not None + if torch.cuda.is_available(): + assert compute_context.device == torch.device('cuda'), "Model is not on CUDA when CUDA is available {}".format(compute_context.device) + else: + assert compute_context.device == torch.device('cpu'), "Model is not on CPU when CUDA is not available {}".format(compute_context.device) \ No newline at end of file From 4192606c5985f9abb567f49c1e311595b9590b37 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 11:02:19 -0500 Subject: [PATCH 04/43] fix arch dims --- .../experiments/architectures/architecture.py | 3 ++- .../architectures/cnnectome_unet.py | 18 ++++++++++++++++++ .../architectures/dummy_architecture.py | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/dacapo/experiments/architectures/architecture.py b/dacapo/experiments/architectures/architecture.py index 0f188560e..93ad5678f 100644 --- a/dacapo/experiments/architectures/architecture.py +++ b/dacapo/experiments/architectures/architecture.py @@ -99,6 +99,7 @@ def num_out_channels(self) -> int: pass @property + @abstractmethod def dims(self) -> int: """ Returns the number of dimensions of the input shape. @@ -115,7 +116,7 @@ def dims(self) -> int: Note: The method is optional and can be overridden in the derived class. """ - return self.input_shape.dims + pass def scale(self, input_voxel_size: Coordinate) -> Coordinate: """ diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index d89e902ac..6afe8444e 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -325,6 +325,24 @@ def input_shape(self): The input shape should be given as a tuple ``(batch, channels, [length,] depth, height, width)``. """ return self._input_shape + + @property + def dims(self): + """ + Return the number of dimensions of the input shape. + + Returns: + The number of dimensions. + Raises: + AttributeError: If the input shape is not given. + Examples: + >>> unet.dims + 3 + Note: + The input shape should be given as a tuple ``(batch, channels, [length,] depth, height, width)``. + """ + # return self.input_shape.dims + return self.unet.dims @property def num_in_channels(self) -> int: diff --git a/dacapo/experiments/architectures/dummy_architecture.py b/dacapo/experiments/architectures/dummy_architecture.py index fa5a889e7..b938301ed 100644 --- a/dacapo/experiments/architectures/dummy_architecture.py +++ b/dacapo/experiments/architectures/dummy_architecture.py @@ -74,6 +74,23 @@ def num_in_channels(self): This method is used to return the number of input channels for this architecture. """ return self.channels_in + + @property + def dims(self): + """ + Returns the number of dimensions of the input shape. + + Returns: + int: The number of dimensions. + Raises: + NotImplementedError: This method is not implemented in this class. + Examples: + >>> dummy_architecture.dims + 3 + Note: + This method is used to return the number of dimensions of the input shape. + """ + return self.input_shape.dims @property def num_out_channels(self): From bc70727f6aba89859c9a6c50857a9135966538d9 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 11:21:36 -0500 Subject: [PATCH 05/43] Revert architecture changes, didn't work --- .../experiments/architectures/architecture.py | 3 +-- .../architectures/cnnectome_unet.py | 18 ------------------ .../architectures/dummy_architecture.py | 17 ----------------- 3 files changed, 1 insertion(+), 37 deletions(-) diff --git a/dacapo/experiments/architectures/architecture.py b/dacapo/experiments/architectures/architecture.py index 93ad5678f..0f188560e 100644 --- a/dacapo/experiments/architectures/architecture.py +++ b/dacapo/experiments/architectures/architecture.py @@ -99,7 +99,6 @@ def num_out_channels(self) -> int: pass @property - @abstractmethod def dims(self) -> int: """ Returns the number of dimensions of the input shape. @@ -116,7 +115,7 @@ def dims(self) -> int: Note: The method is optional and can be overridden in the derived class. """ - pass + return self.input_shape.dims def scale(self, input_voxel_size: Coordinate) -> Coordinate: """ diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index 6afe8444e..d89e902ac 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -325,24 +325,6 @@ def input_shape(self): The input shape should be given as a tuple ``(batch, channels, [length,] depth, height, width)``. """ return self._input_shape - - @property - def dims(self): - """ - Return the number of dimensions of the input shape. - - Returns: - The number of dimensions. - Raises: - AttributeError: If the input shape is not given. - Examples: - >>> unet.dims - 3 - Note: - The input shape should be given as a tuple ``(batch, channels, [length,] depth, height, width)``. - """ - # return self.input_shape.dims - return self.unet.dims @property def num_in_channels(self) -> int: diff --git a/dacapo/experiments/architectures/dummy_architecture.py b/dacapo/experiments/architectures/dummy_architecture.py index b938301ed..fa5a889e7 100644 --- a/dacapo/experiments/architectures/dummy_architecture.py +++ b/dacapo/experiments/architectures/dummy_architecture.py @@ -74,23 +74,6 @@ def num_in_channels(self): This method is used to return the number of input channels for this architecture. """ return self.channels_in - - @property - def dims(self): - """ - Returns the number of dimensions of the input shape. - - Returns: - int: The number of dimensions. - Raises: - NotImplementedError: This method is not implemented in this class. - Examples: - >>> dummy_architecture.dims - 3 - Note: - This method is used to return the number of dimensions of the input shape. - """ - return self.input_shape.dims @property def num_out_channels(self): From 0d3b3d2bfe29f9a1d94bd8ddb0666a438f9f14c6 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 11:23:21 -0500 Subject: [PATCH 06/43] more architecture tests --- tests/fixtures/__init__.py | 2 +- tests/fixtures/architectures.py | 23 ++++++++++++++++++++++- tests/operations/test_architecture.py | 26 ++++++++++++++++++++++++++ tests/operations/test_context.py | 14 +++++++++++--- 4 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 tests/operations/test_architecture.py diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 3ea282acc..23f9a14fe 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,5 +1,5 @@ from .db import options -from .architectures import dummy_architecture +from .architectures import dummy_architecture, unet_architecture from .arrays import dummy_array, zarr_array, cellmap_array from .datasplits import dummy_datasplit, twelve_class_datasplit, six_class_datasplit from .evaluators import binary_3_channel_evaluator diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index 6980c8f6b..e940e5aed 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -1,4 +1,7 @@ -from dacapo.experiments.architectures import DummyArchitectureConfig +from dacapo.experiments.architectures import ( + DummyArchitectureConfig, + CNNectomeUNetConfig, +) import pytest @@ -8,3 +11,21 @@ def dummy_architecture(): yield DummyArchitectureConfig( name="dummy_architecture", num_in_channels=1, num_out_channels=12 ) + + +@pytest.fixture() +def unet_architecture(): + yield CNNectomeUNetConfig( + name="tmp_unet_architecture", + input_shape=(2, 132, 132), + eval_shape_increase=(8, 32, 32), + fmaps_in=2, + num_fmaps=8, + fmaps_out=8, + fmap_inc_factor=2, + downsample_factors=[(1, 4, 4), (1, 4, 4)], + kernel_size_down=[[(1, 3, 3)] * 2] * 3, + kernel_size_up=[[(1, 3, 3)] * 2] * 2, + constant_upsample=True, + padding="valid", + ) diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py new file mode 100644 index 000000000..5ba387b44 --- /dev/null +++ b/tests/operations/test_architecture.py @@ -0,0 +1,26 @@ +from ..fixtures import * + +import pytest +from pytest_lazy_fixtures import lf + +import logging + +logging.basicConfig(level=logging.INFO) + + +@pytest.mark.parametrize( + "architecture_config", + [ + lf("dummy_architecture"), + lf("unet_architecture"), + ], +) +def test_architecture( + architecture_config, +): + + architecture_type = architecture_config.architecture_type + + architecture = architecture_type(architecture_config) + + assert architecture.dims is not None, f"Architecture dims are None {architecture}" diff --git a/tests/operations/test_context.py b/tests/operations/test_context.py index b7f70500e..b2924e721 100644 --- a/tests/operations/test_context.py +++ b/tests/operations/test_context.py @@ -3,12 +3,20 @@ import pytest -@pytest.mark.parametrize("device", ["cpu", "cuda"]) +@pytest.mark.parametrize("device", [""]) def test_create_compute_context(device): compute_context = create_compute_context() assert compute_context is not None assert compute_context.device is not None if torch.cuda.is_available(): - assert compute_context.device == torch.device('cuda'), "Model is not on CUDA when CUDA is available {}".format(compute_context.device) + assert compute_context.device == torch.device( + "cuda" + ), "Model is not on CUDA when CUDA is available {}".format( + compute_context.device + ) else: - assert compute_context.device == torch.device('cpu'), "Model is not on CPU when CUDA is not available {}".format(compute_context.device) \ No newline at end of file + assert compute_context.device == torch.device( + "cpu" + ), "Model is not on CPU when CUDA is not available {}".format( + compute_context.device + ) From c35b702f251238e7766c1962e48fee3a52a97dd8 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 11:38:24 -0500 Subject: [PATCH 07/43] add stored architecture test --- tests/operations/test_architecture.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index 5ba387b44..f1f200715 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -18,9 +18,29 @@ def test_architecture( architecture_config, ): + architecture = architecture_config.architecture_type(architecture_config) + assert architecture.dims is not None, f"Architecture dims are None {architecture}" + + + + +@pytest.mark.parametrize( + "architecture_config", + [ + lf("dummy_architecture"), + lf("unet_architecture"), + ], +) +def test_stored_architecture( + architecture_config, +): + from dacapo.store.create_store import create_config_store + config_store = create_config_store() + config_store.store_architecture_config(architecture_config) + + retrieved_arch_config = config_store.retrieve_architecture_config(architecture_config.name) - architecture_type = architecture_config.architecture_type - architecture = architecture_type(architecture_config) + architecture = retrieved_arch_config.architecture_type(retrieved_arch_config) assert architecture.dims is not None, f"Architecture dims are None {architecture}" From ac1144ed5dda661b04cf172b91669eef2a124a34 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 13 Nov 2024 10:49:06 -0800 Subject: [PATCH 08/43] fix bug in cnnectome_unet return types The Architecture superclass says that we should be returning coordinates for these properties This is important because we regularly use these values in arithmetic where we expect to execute +-*/ element wise --- dacapo/experiments/architectures/cnnectome_unet.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index d89e902ac..f706c4fdb 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -3,6 +3,8 @@ import torch import torch.nn as nn +from funlib.geometry import Coordinate + import math @@ -176,7 +178,7 @@ def __init__(self, architecture_config): self.unet = self.module() @property - def eval_shape_increase(self): + def eval_shape_increase(self) -> Coordinate: """ The increase in shape due to the U-Net. @@ -192,7 +194,7 @@ def eval_shape_increase(self): """ if self._eval_shape_increase is None: return super().eval_shape_increase - return self._eval_shape_increase + return Coordinate(self._eval_shape_increase) def module(self): """ @@ -306,11 +308,11 @@ def scale(self, voxel_size): The voxel size should be given as a tuple ``(z, y, x)``. """ for upsample_factor in self.upsample_factors: - voxel_size = voxel_size / upsample_factor + voxel_size = voxel_size / Coordinate(upsample_factor) return voxel_size @property - def input_shape(self): + def input_shape(self) -> Coordinate: """ Return the input shape of the U-Net. From 08d7074ebe6c5b4d4a7073bec455a2b6d40cc6a7 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 13 Nov 2024 10:49:53 -0800 Subject: [PATCH 09/43] CNNectomeUNet fix kernel size logic --- dacapo/experiments/architectures/cnnectome_unet.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index f706c4fdb..116be284c 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -237,16 +237,15 @@ def module(self): """ fmaps_in = self.fmaps_in levels = len(self.downsample_factors) + 1 - dims = len(self.downsample_factors[0]) - if hasattr(self, "kernel_size_down"): + if self.kernel_size_down is not None: kernel_size_down = self.kernel_size_down else: - kernel_size_down = [[(3,) * dims, (3,) * dims]] * levels - if hasattr(self, "kernel_size_up"): + kernel_size_down = [[(3,) * self.dims, (3,) * self.dims]] * levels + if self.kernel_size_up is not None: kernel_size_up = self.kernel_size_up else: - kernel_size_up = [[(3,) * dims, (3,) * dims]] * (levels - 1) + kernel_size_up = [[(3,) * self.dims, (3,) * self.dims]] * (levels - 1) # downsample factors has to be a list of tuples downsample_factors = [tuple(x) for x in self.downsample_factors] @@ -326,7 +325,7 @@ def input_shape(self) -> Coordinate: Note: The input shape should be given as a tuple ``(batch, channels, [length,] depth, height, width)``. """ - return self._input_shape + return Coordinate(self._input_shape) @property def num_in_channels(self) -> int: From a0b51165d56647d77fb05d3fd7b471aec279e332 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 13 Nov 2024 10:52:52 -0800 Subject: [PATCH 10/43] CNNectomeUNet: make the final conv pass in the upsample pass a bit more robust If we upsample, we probably want to apply a convolution to finetune the outputs rather than simply upsampling which we could do outside of a network. If we assume a kernel of size (3, 3, 3), it fails for 2D networks that process using kernels of size (1, 3, 3). We now just use the last kernel in the kernel size up. This is a bit more robust. --- dacapo/experiments/architectures/cnnectome_unet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/architectures/cnnectome_unet.py b/dacapo/experiments/architectures/cnnectome_unet.py index 116be284c..c064305b1 100644 --- a/dacapo/experiments/architectures/cnnectome_unet.py +++ b/dacapo/experiments/architectures/cnnectome_unet.py @@ -281,7 +281,7 @@ def module(self): conv = ConvPass( self.fmaps_out, self.fmaps_out, - [(3,) * len(upsample_factor)] * 2, + kernel_size_up[-1], activation="ReLU", batch_norm=self.batch_norm, ) From 36cbd6724b793810c7306cc5cec35f064617e78c Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 13 Nov 2024 10:53:37 -0800 Subject: [PATCH 11/43] Gunpowder Trainer: if the raw data doesn't have a channel dim, add it during training Otherwise the BatchNorm breaks --- dacapo/experiments/trainers/gunpowder_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index dcb40c115..bea9c96e2 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -173,6 +173,8 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): assert isinstance(dataset.weight, int), dataset raw_source = gp.ArraySource(raw_key, dataset.raw) + if dataset.raw.channel_dims == 0: + raw_source += gp.Unsqueeze([raw_key], axis=0) if self.clip_raw: raw_source += gp.Crop( raw_key, dataset.gt.roi.snap_to_grid(dataset.raw.voxel_size) From 23d1c22cf83a47d6ff735c43a48a097b89d94330 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 13 Nov 2024 10:54:20 -0800 Subject: [PATCH 12/43] Datasplit test fixture: use the voxel_size attribute for voxel size. This should probably just be switched to use `funlib.perisistence.prepare_ds` --- tests/fixtures/datasplits.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/fixtures/datasplits.py b/tests/fixtures/datasplits.py index 448c9c834..73c282a89 100644 --- a/tests/fixtures/datasplits.py +++ b/tests/fixtures/datasplits.py @@ -73,10 +73,10 @@ def twelve_class_datasplit(tmp_path): gt_dataset[:] += random_data > i raw_dataset[:] = random_data raw_dataset.attrs["offset"] = (0, 0, 0) - raw_dataset.attrs["resolution"] = (2, 2, 2) + raw_dataset.attrs["voxel_size"] = (2, 2, 2) raw_dataset.attrs["axis_names"] = ("z", "y", "x") gt_dataset.attrs["offset"] = (0, 0, 0) - gt_dataset.attrs["resolution"] = (2, 2, 2) + gt_dataset.attrs["voxel_size"] = (2, 2, 2) gt_dataset.attrs["axis_names"] = ("z", "y", "x") crop1 = RawGTDatasetConfig(name="crop1", raw_config=crop1_raw, gt_config=crop1_gt) @@ -184,10 +184,10 @@ def six_class_datasplit(tmp_path): gt_dataset[:] += random_data > i raw_dataset[:] = random_data raw_dataset.attrs["offset"] = (0, 0, 0) - raw_dataset.attrs["resolution"] = (2, 2, 2) + raw_dataset.attrs["voxel_size"] = (2, 2, 2) raw_dataset.attrs["axis_names"] = ("z", "y", "x") gt_dataset.attrs["offset"] = (0, 0, 0) - gt_dataset.attrs["resolution"] = (2, 2, 2) + gt_dataset.attrs["voxel_size"] = (2, 2, 2) gt_dataset.attrs["axis_names"] = ("z", "y", "x") crop1 = RawGTDatasetConfig( From 270c4692864142dd084100930051dd096bdd2230 Mon Sep 17 00:00:00 2001 From: William Patton Date: Wed, 13 Nov 2024 10:55:12 -0800 Subject: [PATCH 13/43] 2D model still only gets data with a single channel from the trainer --- tests/operations/test_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index be0a94d16..e32276cec 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -49,7 +49,7 @@ def unet_architecture(batch_norm, upsample, use_attention, three_d): name=name, input_shape=(2, 132, 132), eval_shape_increase=(8, 32, 32), - fmaps_in=2, + fmaps_in=1, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, From ea4f2b1d8bacdaaefffc328870524bc12fb84138 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 14:09:30 -0500 Subject: [PATCH 14/43] fix dims error --- dacapo/experiments/architectures/architecture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/architectures/architecture.py b/dacapo/experiments/architectures/architecture.py index 0f188560e..7ec81bd4f 100644 --- a/dacapo/experiments/architectures/architecture.py +++ b/dacapo/experiments/architectures/architecture.py @@ -115,7 +115,7 @@ def dims(self) -> int: Note: The method is optional and can be overridden in the derived class. """ - return self.input_shape.dims + return Coordinate(self.input_shape).dims def scale(self, input_voxel_size: Coordinate) -> Coordinate: """ From 57822af38828e4f758aad0d2738af0eaef38d67c Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 19:26:11 +0000 Subject: [PATCH 15/43] :art: Format Python code with psf/black --- tests/operations/test_architecture.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index f1f200715..e2ee90502 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -22,8 +22,6 @@ def test_architecture( assert architecture.dims is not None, f"Architecture dims are None {architecture}" - - @pytest.mark.parametrize( "architecture_config", [ @@ -35,11 +33,13 @@ def test_stored_architecture( architecture_config, ): from dacapo.store.create_store import create_config_store + config_store = create_config_store() config_store.store_architecture_config(architecture_config) - - retrieved_arch_config = config_store.retrieve_architecture_config(architecture_config.name) + retrieved_arch_config = config_store.retrieve_architecture_config( + architecture_config.name + ) architecture = retrieved_arch_config.architecture_type(retrieved_arch_config) From f3d0508135cd17168cd79e6c463929826544c4f8 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 15:25:58 -0500 Subject: [PATCH 16/43] revert change, fixed in unet --- dacapo/experiments/architectures/architecture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/architectures/architecture.py b/dacapo/experiments/architectures/architecture.py index 7ec81bd4f..0f188560e 100644 --- a/dacapo/experiments/architectures/architecture.py +++ b/dacapo/experiments/architectures/architecture.py @@ -115,7 +115,7 @@ def dims(self) -> int: Note: The method is optional and can be overridden in the derived class. """ - return Coordinate(self.input_shape).dims + return self.input_shape.dims def scale(self, input_voxel_size: Coordinate) -> Coordinate: """ From 6d87d38ccf0f4729a461dab65a3df092b9057bd6 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 16:28:41 -0500 Subject: [PATCH 17/43] organize --- tests/fixtures/__init__.py | 2 +- tests/fixtures/architectures.py | 43 ++++++++++++++++++ tests/operations/test_train.py | 79 +++++++++++++-------------------- 3 files changed, 74 insertions(+), 50 deletions(-) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 23f9a14fe..b7cf0ac07 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,5 +1,5 @@ from .db import options -from .architectures import dummy_architecture, unet_architecture +from .architectures import dummy_architecture, unet_architecture,unet_architecture_builder from .arrays import dummy_array, zarr_array, cellmap_array from .datasplits import dummy_datasplit, twelve_class_datasplit, six_class_datasplit from .evaluators import binary_3_channel_evaluator diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index e940e5aed..e407d3d20 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -29,3 +29,46 @@ def unet_architecture(): constant_upsample=True, padding="valid", ) + + + +def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): + name = "3d_unet" if three_d else "2d_unet" + name = f"{name}_bn" if batch_norm else name + name = f"{name}_up" if upsample else name + name = f"{name}_att" if use_attention else name + + if three_d: + return CNNectomeUNetConfig( + name=name, + input_shape=(188, 188, 188), + eval_shape_increase=(72, 72, 72), + fmaps_in=1, + num_fmaps=6, + fmaps_out=6, + fmap_inc_factor=2, + downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], + constant_upsample=True, + upsample_factors=[(2, 2, 2)] if upsample else [], + batch_norm=batch_norm, + use_attention=use_attention, + ) + else: + return CNNectomeUNetConfig( + name=name, + input_shape=(2, 132, 132), + eval_shape_increase=(8, 32, 32), + fmaps_in=1, + num_fmaps=8, + fmaps_out=8, + fmap_inc_factor=2, + downsample_factors=[(1, 4, 4), (1, 4, 4)], + kernel_size_down=[[(1, 3, 3)] * 2] * 3, + kernel_size_up=[[(1, 3, 3)] * 2] * 2, + constant_upsample=True, + padding="valid", + batch_norm=batch_norm, + use_attention=use_attention, + upsample_factors=[(1, 2, 2)] if upsample else [], + ) + diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index e32276cec..81b86b371 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -15,55 +15,9 @@ logging.basicConfig(level=logging.INFO) -from dacapo.experiments.architectures import ( - DummyArchitectureConfig, - CNNectomeUNetConfig, -) - import pytest -def unet_architecture(batch_norm, upsample, use_attention, three_d): - name = "3d_unet" if three_d else "2d_unet" - name = f"{name}_bn" if batch_norm else name - name = f"{name}_up" if upsample else name - name = f"{name}_att" if use_attention else name - - if three_d: - return CNNectomeUNetConfig( - name=name, - input_shape=(188, 188, 188), - eval_shape_increase=(72, 72, 72), - fmaps_in=1, - num_fmaps=6, - fmaps_out=6, - fmap_inc_factor=2, - downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], - constant_upsample=True, - upsample_factors=[(2, 2, 2)] if upsample else [], - batch_norm=batch_norm, - use_attention=use_attention, - ) - else: - return CNNectomeUNetConfig( - name=name, - input_shape=(2, 132, 132), - eval_shape_increase=(8, 32, 32), - fmaps_in=1, - num_fmaps=8, - fmaps_out=8, - fmap_inc_factor=2, - downsample_factors=[(1, 4, 4), (1, 4, 4)], - kernel_size_down=[[(1, 3, 3)] * 2] * 3, - kernel_size_up=[[(1, 3, 3)] * 2] * 2, - constant_upsample=True, - padding="valid", - batch_norm=batch_norm, - use_attention=use_attention, - upsample_factors=[(1, 2, 2)] if upsample else [], - ) - - # skip the test for the Apple Paravirtual device # that does not support Metal 2.0 @pytest.mark.filterwarnings("ignore:.*Metal 2.0.*:UserWarning") @@ -115,7 +69,34 @@ def test_train( @pytest.mark.parametrize("task", [lf("distance_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) @pytest.mark.parametrize("batch_norm", [True, False]) -@pytest.mark.parametrize("upsample", [True, False]) +@pytest.mark.parametrize("upsample", [False]) +@pytest.mark.parametrize("use_attention", [True, False]) +@pytest.mark.parametrize("three_d", [True, False]) +def test_train_unet( + datasplit, task, trainer, batch_norm, upsample, use_attention, three_d +): + architecture_config = unet_architecture( + batch_norm, upsample, use_attention, three_d + ) + + run_config = RunConfig( + name=f"{architecture_config.name}_run", + task_config=task, + architecture_config=architecture_config, + trainer_config=trainer, + datasplit_config=datasplit, + repetition=0, + num_iterations=2, + ) + run = Run(run_config) + train_run(run) + + +@pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) +@pytest.mark.parametrize("task", [lf("distance_task")]) +@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) +@pytest.mark.parametrize("batch_norm", [True, False]) +@pytest.mark.parametrize("upsample", [False]) @pytest.mark.parametrize("use_attention", [True, False]) @pytest.mark.parametrize("three_d", [True, False]) def test_train_unet( @@ -125,7 +106,7 @@ def test_train_unet( stats_store = create_stats_store() weights_store = create_weights_store() - architecture_config = unet_architecture( + architecture_config = unet_architecture_builder( batch_norm, upsample, use_attention, three_d ) @@ -136,7 +117,7 @@ def test_train_unet( trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=2, + num_iterations=30, ) try: store.store_run_config(run_config) From 2b80cfab2bc9ffc975654bcdc559d876fe3e169c Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 17:24:30 -0500 Subject: [PATCH 18/43] add validate test --- tests/fixtures/__init__.py | 2 +- tests/fixtures/architectures.py | 2 +- tests/fixtures/datasplits.py | 119 ++++++++++++++++++++++++++ tests/operations/test_architecture.py | 6 +- tests/operations/test_train.py | 73 ++++++++++++++-- tests/operations/test_validate.py | 36 ++++++++ 6 files changed, 228 insertions(+), 10 deletions(-) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index b7cf0ac07..4d30aff6a 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,7 +1,7 @@ from .db import options from .architectures import dummy_architecture, unet_architecture,unet_architecture_builder from .arrays import dummy_array, zarr_array, cellmap_array -from .datasplits import dummy_datasplit, twelve_class_datasplit, six_class_datasplit +from .datasplits import dummy_datasplit, twelve_class_datasplit, six_class_datasplit, upsample_six_class_datasplit from .evaluators import binary_3_channel_evaluator from .losses import dummy_loss from .post_processors import argmax, threshold diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index e407d3d20..5f82b399f 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -19,7 +19,7 @@ def unet_architecture(): name="tmp_unet_architecture", input_shape=(2, 132, 132), eval_shape_increase=(8, 32, 32), - fmaps_in=2, + fmaps_in=1, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, diff --git a/tests/fixtures/datasplits.py b/tests/fixtures/datasplits.py index 73c282a89..ca546e161 100644 --- a/tests/fixtures/datasplits.py +++ b/tests/fixtures/datasplits.py @@ -206,3 +206,122 @@ def six_class_datasplit(tmp_path): validate_configs=[crop3], ) return six_class_distances_datasplit_config + + + + +@pytest.fixture() +def upsample_six_class_datasplit(tmp_path): + """ + two crops for training, one for validation. Raw data is normally distributed + around 0 with std 1. + gt is provided as distances. First, gt is generated as a 12 class problem: + gt has 12 classes where class i in [0, 11] is all voxels with raw intensity + between (raw.min() + i(raw.max()-raw.min())/12, raw.min() + (i+1)(raw.max()-raw.min())/12). + Then we pair up classes (i, i+1) for i in [0,2,4,6,8,10], and compute distances to + the nearest voxel in the pair. This leaves us with 6 distance channels. + """ + twelve_class_zarr = zarr.open(tmp_path / "twelve_class.zarr", "w") + crop1_raw = ZarrArrayConfig( + name="crop1_raw", + file_name=tmp_path / "twelve_class.zarr", + dataset=f"volumes/crop1/raw", + ) + crop1_gt = ZarrArrayConfig( + name="crop1_gt", + file_name=tmp_path / "twelve_class.zarr", + dataset=f"volumes/crop1/gt", + ) + crop1_distances = BinarizeArrayConfig( + "crop1_distances", + source_array_config=crop1_gt, + groupings=[ + ("a", [0, 1]), + ("b", [2, 3]), + ("c", [4, 5]), + ("d", [6, 7]), + ("e", [8, 9]), + ("f", [10, 11]), + ], + ) + crop2_raw = ZarrArrayConfig( + name="crop2_raw", + file_name=tmp_path / "twelve_class.zarr", + dataset=f"volumes/crop2/raw", + ) + crop2_gt = ZarrArrayConfig( + name="crop2_gt", + file_name=tmp_path / "twelve_class.zarr", + dataset=f"volumes/crop2/gt", + ) + crop2_distances = BinarizeArrayConfig( + "crop2_distances", + source_array_config=crop2_gt, + groupings=[ + ("a", [0, 1]), + ("b", [2, 3]), + ("c", [4, 5]), + ("d", [6, 7]), + ("e", [8, 9]), + ("f", [10, 11]), + ], + ) + crop3_raw = ZarrArrayConfig( + name="crop3_raw", + file_name=tmp_path / "twelve_class.zarr", + dataset=f"volumes/crop3/raw", + ) + crop3_gt = ZarrArrayConfig( + name="crop3_gt", + file_name=tmp_path / "twelve_class.zarr", + dataset=f"volumes/crop3/gt", + ) + crop3_distances = BinarizeArrayConfig( + "crop3_distances", + source_array_config=crop3_gt, + groupings=[ + ("a", [0, 1]), + ("b", [2, 3]), + ("c", [4, 5]), + ("d", [6, 7]), + ("e", [8, 9]), + ("f", [10, 11]), + ], + ) + for raw, gt in zip( + [crop1_raw, crop2_raw, crop3_raw], [crop1_gt, crop2_gt, crop3_gt] + ): + raw_dataset = twelve_class_zarr.create_dataset( + raw.dataset, shape=(40, 20, 20), dtype=np.float32 + ) + gt_dataset = twelve_class_zarr.create_dataset( + gt.dataset, shape=(40, 20, 20), dtype=np.uint8 + ) + random_data = np.random.rand(40, 20, 20) + # as intensities increase so does the class + for i in list(np.linspace(random_data.min(), random_data.max(), 13))[1:]: + gt_dataset[:] += random_data > i + raw_dataset[:] = random_data + raw_dataset.attrs["offset"] = (0, 0, 0) + raw_dataset.attrs["voxel_size"] = (4, 4, 4) + raw_dataset.attrs["axis_names"] = ("z", "y", "x") + gt_dataset.attrs["offset"] = (0, 0, 0) + gt_dataset.attrs["voxel_size"] = (2, 2, 2) + gt_dataset.attrs["axis_names"] = ("z", "y", "x") + + crop1 = RawGTDatasetConfig( + name="crop1", raw_config=crop1_raw, gt_config=crop1_distances + ) + crop2 = RawGTDatasetConfig( + name="crop2", raw_config=crop2_raw, gt_config=crop2_distances + ) + crop3 = RawGTDatasetConfig( + name="crop3", raw_config=crop3_raw, gt_config=crop3_distances + ) + + six_class_distances_datasplit_config = TrainValidateDataSplitConfig( + name="six_class_distances_datasplit", + train_configs=[crop1, crop2], + validate_configs=[crop3], + ) + return six_class_distances_datasplit_config diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index e2ee90502..de5d44f61 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -35,7 +35,11 @@ def test_stored_architecture( from dacapo.store.create_store import create_config_store config_store = create_config_store() - config_store.store_architecture_config(architecture_config) + try: + config_store.store_architecture_config(architecture_config) + except: + config_store.delete_architecture_config(architecture_config.name) + config_store.store_architecture_config(architecture_config) retrieved_arch_config = config_store.retrieve_architecture_config( architecture_config.name diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index 81b86b371..5af0d95c7 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -68,14 +68,14 @@ def test_train( @pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) @pytest.mark.parametrize("task", [lf("distance_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("batch_norm", [True, False]) +@pytest.mark.parametrize("batch_norm", [ False]) @pytest.mark.parametrize("upsample", [False]) -@pytest.mark.parametrize("use_attention", [True, False]) -@pytest.mark.parametrize("three_d", [True, False]) +@pytest.mark.parametrize("use_attention", [ False]) +@pytest.mark.parametrize("three_d", [ False]) def test_train_unet( datasplit, task, trainer, batch_norm, upsample, use_attention, three_d ): - architecture_config = unet_architecture( + architecture_config = unet_architecture_builder( batch_norm, upsample, use_attention, three_d ) @@ -117,7 +117,7 @@ def test_train_unet( trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=30, + num_iterations=2, ) try: store.store_run_config(run_config) @@ -138,8 +138,67 @@ def test_train_unet( final_weights = weights_store.retrieve_weights(run.name, run.train_until) for name, weight in init_weights.model.items(): - weight_diff = (weight - final_weights.model[name]).sum() - assert abs(weight_diff) > np.finfo(weight_diff.numpy().dtype).eps, weight_diff + weight_diff = (weight - final_weights.model[name]).any() + assert weight_diff != 0, "Weights did not change" + + # assert train_stats and validation_scores are available + + training_stats = stats_store.retrieve_training_stats(run_config.name) + + assert training_stats.trained_until() == run_config.num_iterations + + + + + +@pytest.mark.parametrize("upsample_datasplit", [lf("upsample_six_class_datasplit")]) +@pytest.mark.parametrize("task", [lf("distance_task")]) +@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) +@pytest.mark.parametrize("batch_norm", [True, False]) +@pytest.mark.parametrize("upsample", [True]) +@pytest.mark.parametrize("use_attention", [True, False]) +@pytest.mark.parametrize("three_d", [True, False]) +def test_upsample_train_unet( + upsample_datasplit, task, trainer, batch_norm, upsample, use_attention, three_d +): + store = create_config_store() + stats_store = create_stats_store() + weights_store = create_weights_store() + + architecture_config = unet_architecture_builder( + batch_norm, upsample, use_attention, three_d + ) + + run_config = RunConfig( + name=f"{architecture_config.name}_run", + task_config=task, + architecture_config=architecture_config, + trainer_config=trainer, + datasplit_config=upsample_datasplit, + repetition=0, + num_iterations=2, + ) + try: + store.store_run_config(run_config) + except Exception as e: + store.delete_run_config(run_config.name) + store.store_run_config(run_config) + + run = Run(run_config) + + # ------------------------------------- + + # train + + weights_store.store_weights(run, 0) + train_run(run) + + init_weights = weights_store.retrieve_weights(run.name, 0) + final_weights = weights_store.retrieve_weights(run.name, run.train_until) + + for name, weight in init_weights.model.items(): + weight_diff = (weight - final_weights.model[name]).any() + assert weight_diff != 0, "Weights did not change" # assert train_stats and validation_scores are available diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index 860f941e9..d776b82db 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -7,6 +7,8 @@ from dacapo.store.create_store import create_config_store, create_weights_store from dacapo import validate, validate_run +from dacapo.experiments.run_config import RunConfig + import pytest from pytest_lazy_fixtures import lf @@ -97,3 +99,37 @@ def test_validate_run( if debug: os.chdir(old_path) + + + + +@pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) +@pytest.mark.parametrize("task", [lf("distance_task")]) +@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) +@pytest.mark.parametrize("architecture", [lf("unet_architecture")]) +def test_validate_unet( + datasplit, task, trainer, architecture +): + store = create_config_store() + weights_store = create_weights_store() + + run_config = RunConfig( + name=f"{architecture.name}_run", + task_config=task, + architecture_config=architecture, + trainer_config=trainer, + datasplit_config=datasplit, + repetition=0, + num_iterations=2, + ) + try: + store.store_run_config(run_config) + except Exception as e: + store.delete_run_config(run_config.name) + store.store_run_config(run_config) + + run = Run(run_config) + + # ------------------------------------- + weights_store.store_weights(run, 0) + validate_run(run, 0) \ No newline at end of file From 65a87438156c18248313b84d875b84bdc10180f1 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 13 Nov 2024 22:25:05 +0000 Subject: [PATCH 19/43] :art: Format Python code with psf/black --- tests/fixtures/__init__.py | 13 +++++++++++-- tests/fixtures/architectures.py | 2 -- tests/fixtures/datasplits.py | 2 -- tests/operations/test_train.py | 9 +++------ tests/operations/test_validate.py | 8 ++------ 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 4d30aff6a..db427abdb 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -1,7 +1,16 @@ from .db import options -from .architectures import dummy_architecture, unet_architecture,unet_architecture_builder +from .architectures import ( + dummy_architecture, + unet_architecture, + unet_architecture_builder, +) from .arrays import dummy_array, zarr_array, cellmap_array -from .datasplits import dummy_datasplit, twelve_class_datasplit, six_class_datasplit, upsample_six_class_datasplit +from .datasplits import ( + dummy_datasplit, + twelve_class_datasplit, + six_class_datasplit, + upsample_six_class_datasplit, +) from .evaluators import binary_3_channel_evaluator from .losses import dummy_loss from .post_processors import argmax, threshold diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index 5f82b399f..da3911f5d 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -31,7 +31,6 @@ def unet_architecture(): ) - def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): name = "3d_unet" if three_d else "2d_unet" name = f"{name}_bn" if batch_norm else name @@ -71,4 +70,3 @@ def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): use_attention=use_attention, upsample_factors=[(1, 2, 2)] if upsample else [], ) - diff --git a/tests/fixtures/datasplits.py b/tests/fixtures/datasplits.py index ca546e161..e94aee0c6 100644 --- a/tests/fixtures/datasplits.py +++ b/tests/fixtures/datasplits.py @@ -208,8 +208,6 @@ def six_class_datasplit(tmp_path): return six_class_distances_datasplit_config - - @pytest.fixture() def upsample_six_class_datasplit(tmp_path): """ diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index 5af0d95c7..98d1d68d2 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -68,10 +68,10 @@ def test_train( @pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) @pytest.mark.parametrize("task", [lf("distance_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("batch_norm", [ False]) +@pytest.mark.parametrize("batch_norm", [False]) @pytest.mark.parametrize("upsample", [False]) -@pytest.mark.parametrize("use_attention", [ False]) -@pytest.mark.parametrize("three_d", [ False]) +@pytest.mark.parametrize("use_attention", [False]) +@pytest.mark.parametrize("three_d", [False]) def test_train_unet( datasplit, task, trainer, batch_norm, upsample, use_attention, three_d ): @@ -148,9 +148,6 @@ def test_train_unet( assert training_stats.trained_until() == run_config.num_iterations - - - @pytest.mark.parametrize("upsample_datasplit", [lf("upsample_six_class_datasplit")]) @pytest.mark.parametrize("task", [lf("distance_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index d776b82db..280d2dcfb 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -101,15 +101,11 @@ def test_validate_run( os.chdir(old_path) - - @pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) @pytest.mark.parametrize("task", [lf("distance_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) @pytest.mark.parametrize("architecture", [lf("unet_architecture")]) -def test_validate_unet( - datasplit, task, trainer, architecture -): +def test_validate_unet(datasplit, task, trainer, architecture): store = create_config_store() weights_store = create_weights_store() @@ -132,4 +128,4 @@ def test_validate_unet( # ------------------------------------- weights_store.store_weights(run, 0) - validate_run(run, 0) \ No newline at end of file + validate_run(run, 0) From 200ff20bbc1aa50e7339333f7252dd5d9fbde9c4 Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 14 Nov 2024 09:49:20 -0500 Subject: [PATCH 20/43] add unet2d/3d validation test --- tests/fixtures/__init__.py | 3 ++- tests/fixtures/architectures.py | 16 ++++++++++++++++ tests/fixtures/tasks.py | 7 +++++++ tests/operations/test_validate.py | 4 ++-- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index db427abdb..849cb035d 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -2,6 +2,7 @@ from .architectures import ( dummy_architecture, unet_architecture, + unet_3d_architecture, unet_architecture_builder, ) from .arrays import dummy_array, zarr_array, cellmap_array @@ -16,5 +17,5 @@ from .post_processors import argmax, threshold from .predictors import distance_predictor, onehot_predictor from .runs import dummy_run, distance_run, onehot_run -from .tasks import dummy_task, distance_task, onehot_task +from .tasks import dummy_task, distance_task, onehot_task, six_onehot_task from .trainers import dummy_trainer, gunpowder_trainer diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index da3911f5d..a75a7764f 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -31,6 +31,22 @@ def unet_architecture(): ) + +@pytest.fixture() +def unet_3d_architecture(): + yield CNNectomeUNetConfig( + name="tmp_unet_3d_architecture", + input_shape=(188, 188, 188), + eval_shape_increase=(72, 72, 72), + fmaps_in=1, + num_fmaps=6, + fmaps_out=6, + fmap_inc_factor=2, + downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], + constant_upsample=True, + ) + + def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): name = "3d_unet" if three_d else "2d_unet" name = f"{name}_bn" if batch_norm else name diff --git a/tests/fixtures/tasks.py b/tests/fixtures/tasks.py index fcd2c673e..3f91106b5 100644 --- a/tests/fixtures/tasks.py +++ b/tests/fixtures/tasks.py @@ -34,3 +34,10 @@ def onehot_task(): name="one_hot_task", classes=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], ) + +@pytest.fixture() +def six_onehot_task(): + yield OneHotTaskConfig( + name="one_hot_task", + classes=["a", "b", "c", "d", "e", "f"], + ) \ No newline at end of file diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index 280d2dcfb..abec46ea9 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -102,9 +102,9 @@ def test_validate_run( @pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) -@pytest.mark.parametrize("task", [lf("distance_task")]) +@pytest.mark.parametrize("task", [lf("distance_task"), lf("six_onehot_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("architecture", [lf("unet_architecture")]) +@pytest.mark.parametrize("architecture", [lf("unet_architecture"), lf("unet_3d_architecture")]) def test_validate_unet(datasplit, task, trainer, architecture): store = create_config_store() weights_store = create_weights_store() From 1abb3fdff8af15608e07db84581673c8909124c0 Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 14 Nov 2024 14:49:56 +0000 Subject: [PATCH 21/43] :art: Format Python code with psf/black --- tests/fixtures/architectures.py | 21 ++++++++++----------- tests/fixtures/tasks.py | 3 ++- tests/operations/test_validate.py | 4 +++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index a75a7764f..71b2251de 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -31,20 +31,19 @@ def unet_architecture(): ) - @pytest.fixture() def unet_3d_architecture(): yield CNNectomeUNetConfig( - name="tmp_unet_3d_architecture", - input_shape=(188, 188, 188), - eval_shape_increase=(72, 72, 72), - fmaps_in=1, - num_fmaps=6, - fmaps_out=6, - fmap_inc_factor=2, - downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], - constant_upsample=True, - ) + name="tmp_unet_3d_architecture", + input_shape=(188, 188, 188), + eval_shape_increase=(72, 72, 72), + fmaps_in=1, + num_fmaps=6, + fmaps_out=6, + fmap_inc_factor=2, + downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], + constant_upsample=True, + ) def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): diff --git a/tests/fixtures/tasks.py b/tests/fixtures/tasks.py index 3f91106b5..4230fd9b7 100644 --- a/tests/fixtures/tasks.py +++ b/tests/fixtures/tasks.py @@ -35,9 +35,10 @@ def onehot_task(): classes=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], ) + @pytest.fixture() def six_onehot_task(): yield OneHotTaskConfig( name="one_hot_task", classes=["a", "b", "c", "d", "e", "f"], - ) \ No newline at end of file + ) diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index abec46ea9..1ae80cd01 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -104,7 +104,9 @@ def test_validate_run( @pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) @pytest.mark.parametrize("task", [lf("distance_task"), lf("six_onehot_task")]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("architecture", [lf("unet_architecture"), lf("unet_3d_architecture")]) +@pytest.mark.parametrize( + "architecture", [lf("unet_architecture"), lf("unet_3d_architecture")] +) def test_validate_unet(datasplit, task, trainer, architecture): store = create_config_store() weights_store = create_weights_store() From dd39075af8f7466f88021b81eac9d77ce6a262cd Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 14 Nov 2024 10:53:56 -0500 Subject: [PATCH 22/43] add validation tests --- dacapo/predict_local.py | 5 +++++ tests/operations/test_architecture.py | 32 +++++++++++++++++++++++++++ tests/operations/test_validate.py | 2 +- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index 674d00a40..875bed602 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -71,6 +71,10 @@ def predict( compute_context = create_compute_context() device = compute_context.device + model_device = next(model.parameters()).device + + assert model_device == device, f"Model is not on the right device, Model: {model_device}, Compute device: {device}" + def predict_fn(block): raw_input = raw_array.to_ndarray(block.read_roi) @@ -85,6 +89,7 @@ def predict_fn(block): raw_input = np.expand_dims(raw_input, 0) axis_names = ["c^"] + axis_names + with torch.no_grad(): model.eval() predictions = ( diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index de5d44f61..272c410fc 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -2,6 +2,7 @@ import pytest from pytest_lazy_fixtures import lf +import torch.nn as nn import logging @@ -27,6 +28,7 @@ def test_architecture( [ lf("dummy_architecture"), lf("unet_architecture"), + lf("unet_3d_architecture"), ], ) def test_stored_architecture( @@ -48,3 +50,33 @@ def test_stored_architecture( architecture = retrieved_arch_config.architecture_type(retrieved_arch_config) assert architecture.dims is not None, f"Architecture dims are None {architecture}" + + +@pytest.mark.parametrize( + "architecture_config", + [ + lf("unet_architecture"), + ], +) +def test_2d_conv_unet( + architecture_config, +): + architecture = architecture_config.architecture_type(architecture_config) + for name, module in architecture.named_modules(): + if isinstance(module, nn.Conv3d): + raise ValueError(f"Conv3d found in 2d unet {name}") + + +@pytest.mark.parametrize( + "architecture_config", + [ + lf("unet_3d_architecture"), + ], +) +def test_2d_conv_unet( + architecture_config, +): + architecture = architecture_config.architecture_type(architecture_config) + for name, module in architecture.named_modules(): + if isinstance(module, nn.Conv2d): + raise ValueError(f"Conv2d found in 3d unet {name}") diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index 1ae80cd01..78319654c 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -130,4 +130,4 @@ def test_validate_unet(datasplit, task, trainer, architecture): # ------------------------------------- weights_store.store_weights(run, 0) - validate_run(run, 0) + validate(run.name, 0) From 4a18c42b9d291d246fa42fcf7f8c21877a884749 Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 14 Nov 2024 16:40:45 -0500 Subject: [PATCH 23/43] error handling 2d unet --- dacapo/predict_local.py | 4 +- tests/fixtures/__init__.py | 2 +- tests/fixtures/architectures.py | 22 ++--- tests/fixtures/runs.py | 43 ++++++++- tests/operations/test_architecture.py | 37 +++++++- tests/operations/test_train.py | 120 +++++++++++++------------- tests/operations/test_validate.py | 4 +- 7 files changed, 152 insertions(+), 80 deletions(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index 875bed602..d829dffdb 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -71,9 +71,9 @@ def predict( compute_context = create_compute_context() device = compute_context.device - model_device = next(model.parameters()).device + model_device = str(next(model.parameters()).device).split(":")[0] - assert model_device == device, f"Model is not on the right device, Model: {model_device}, Compute device: {device}" + assert model_device == str(device), f"Model is not on the right device, Model: {model_device}, Compute device: {device}" def predict_fn(block): raw_input = raw_array.to_ndarray(block.read_roi) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 849cb035d..d4186d29b 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -16,6 +16,6 @@ from .losses import dummy_loss from .post_processors import argmax, threshold from .predictors import distance_predictor, onehot_predictor -from .runs import dummy_run, distance_run, onehot_run +from .runs import dummy_run, distance_run, onehot_run, unet_2d_distance_run, unet_3d_distance_run from .tasks import dummy_task, distance_task, onehot_task, six_onehot_task from .trainers import dummy_trainer, gunpowder_trainer diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index 71b2251de..558dea561 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -17,15 +17,15 @@ def dummy_architecture(): def unet_architecture(): yield CNNectomeUNetConfig( name="tmp_unet_architecture", - input_shape=(2, 132, 132), - eval_shape_increase=(8, 32, 32), + input_shape=(132, 132), + eval_shape_increase=(32, 32), fmaps_in=1, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, - downsample_factors=[(1, 4, 4), (1, 4, 4)], - kernel_size_down=[[(1, 3, 3)] * 2] * 3, - kernel_size_up=[[(1, 3, 3)] * 2] * 2, + downsample_factors=[(4, 4), (4, 4)], + kernel_size_down=[[(3, 3)] * 2] * 3, + kernel_size_up=[[(3, 3)] * 2] * 2, constant_upsample=True, padding="valid", ) @@ -70,18 +70,18 @@ def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): else: return CNNectomeUNetConfig( name=name, - input_shape=(2, 132, 132), - eval_shape_increase=(8, 32, 32), + input_shape=(132, 132), + eval_shape_increase=(32, 32), fmaps_in=1, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, - downsample_factors=[(1, 4, 4), (1, 4, 4)], - kernel_size_down=[[(1, 3, 3)] * 2] * 3, - kernel_size_up=[[(1, 3, 3)] * 2] * 2, + downsample_factors=[(4, 4), ( 4, 4)], + kernel_size_down=[[( 3, 3)] * 2] * 3, + kernel_size_up=[[(3, 3)] * 2] * 2, constant_upsample=True, padding="valid", batch_norm=batch_norm, use_attention=use_attention, - upsample_factors=[(1, 2, 2)] if upsample else [], + upsample_factors=[(2, 2)] if upsample else [], ) diff --git a/tests/fixtures/runs.py b/tests/fixtures/runs.py index 99c4d3269..d5e584f9f 100644 --- a/tests/fixtures/runs.py +++ b/tests/fixtures/runs.py @@ -17,7 +17,7 @@ def distance_run( trainer_config=gunpowder_trainer, datasplit_config=six_class_datasplit, repetition=0, - num_iterations=100, + num_iterations=10, ) @@ -35,7 +35,7 @@ def dummy_run( trainer_config=dummy_trainer, datasplit_config=dummy_datasplit, repetition=0, - num_iterations=100, + num_iterations=10, ) @@ -53,5 +53,42 @@ def onehot_run( trainer_config=gunpowder_trainer, datasplit_config=twelve_class_datasplit, repetition=0, - num_iterations=100, + num_iterations=10, + ) + + +@pytest.fixture() +def unet_2d_distance_run( + six_class_datasplit, + unet_architecture, + distance_task, + gunpowder_trainer, +): + yield RunConfig( + name="unet_2d_distance_run", + task_config=distance_task, + architecture_config=unet_architecture, + trainer_config=gunpowder_trainer, + datasplit_config=six_class_datasplit, + repetition=0, + num_iterations=10, + ) + + + +@pytest.fixture() +def unet_3d_distance_run( + six_class_datasplit, + unet_3d_architecture, + distance_task, + gunpowder_trainer, +): + yield RunConfig( + name="unet_3d_distance_run", + task_config=distance_task, + architecture_config=unet_3d_architecture, + trainer_config=gunpowder_trainer, + datasplit_config=six_class_datasplit, + repetition=0, + num_iterations=10, ) diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index 272c410fc..55ddf765a 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -3,7 +3,7 @@ import pytest from pytest_lazy_fixtures import lf import torch.nn as nn - +from dacapo.experiments import Run import logging logging.basicConfig(level=logging.INFO) @@ -58,7 +58,7 @@ def test_stored_architecture( lf("unet_architecture"), ], ) -def test_2d_conv_unet( +def test_3d_conv_unet( architecture_config, ): architecture = architecture_config.architecture_type(architecture_config) @@ -80,3 +80,36 @@ def test_2d_conv_unet( for name, module in architecture.named_modules(): if isinstance(module, nn.Conv2d): raise ValueError(f"Conv2d found in 3d unet {name}") + + + +@pytest.mark.parametrize( + "run_config", + [ + lf("unet_2d_distance_run"), + ], +) +def test_2d_conv_unet_in_run( + run_config, +): + run = Run(run_config) + model = run.model + for name, module in model.named_modules(): + if isinstance(module, nn.Conv3d): + raise ValueError(f"Conv3d found in 2d unet {name}") + + +@pytest.mark.parametrize( + "run_config", + [ + lf("unet_3d_distance_run"), + ], +) +def test_3d_conv_unet_in_run( + run_config, +): + run = Run(run_config) + model = run.model + for name, module in model.named_modules(): + if isinstance(module, nn.Conv2d): + raise ValueError(f"Conv2d found in 3d unet {name}") diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index 98d1d68d2..ebb6d4c34 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -72,7 +72,7 @@ def test_train( @pytest.mark.parametrize("upsample", [False]) @pytest.mark.parametrize("use_attention", [False]) @pytest.mark.parametrize("three_d", [False]) -def test_train_unet( +def test_train_non_stored_unet( datasplit, task, trainer, batch_norm, upsample, use_attention, three_d ): architecture_config = unet_architecture_builder( @@ -80,13 +80,13 @@ def test_train_unet( ) run_config = RunConfig( - name=f"{architecture_config.name}_run", + name=f"{architecture_config.name}_run_v", task_config=task, architecture_config=architecture_config, trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=2, + num_iterations=1, ) run = Run(run_config) train_run(run) @@ -110,6 +110,7 @@ def test_train_unet( batch_norm, upsample, use_attention, three_d ) + run_config = RunConfig( name=f"{architecture_config.name}_run", task_config=task, @@ -117,7 +118,7 @@ def test_train_unet( trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=2, + num_iterations=1, ) try: store.store_run_config(run_config) @@ -135,7 +136,7 @@ def test_train_unet( train_run(run) init_weights = weights_store.retrieve_weights(run.name, 0) - final_weights = weights_store.retrieve_weights(run.name, run.train_until) + final_weights = weights_store.retrieve_weights(run.name, 1) for name, weight in init_weights.model.items(): weight_diff = (weight - final_weights.model[name]).any() @@ -148,57 +149,58 @@ def test_train_unet( assert training_stats.trained_until() == run_config.num_iterations -@pytest.mark.parametrize("upsample_datasplit", [lf("upsample_six_class_datasplit")]) -@pytest.mark.parametrize("task", [lf("distance_task")]) -@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("batch_norm", [True, False]) -@pytest.mark.parametrize("upsample", [True]) -@pytest.mark.parametrize("use_attention", [True, False]) -@pytest.mark.parametrize("three_d", [True, False]) -def test_upsample_train_unet( - upsample_datasplit, task, trainer, batch_norm, upsample, use_attention, three_d -): - store = create_config_store() - stats_store = create_stats_store() - weights_store = create_weights_store() - - architecture_config = unet_architecture_builder( - batch_norm, upsample, use_attention, three_d - ) - - run_config = RunConfig( - name=f"{architecture_config.name}_run", - task_config=task, - architecture_config=architecture_config, - trainer_config=trainer, - datasplit_config=upsample_datasplit, - repetition=0, - num_iterations=2, - ) - try: - store.store_run_config(run_config) - except Exception as e: - store.delete_run_config(run_config.name) - store.store_run_config(run_config) - - run = Run(run_config) - - # ------------------------------------- - - # train - - weights_store.store_weights(run, 0) - train_run(run) - - init_weights = weights_store.retrieve_weights(run.name, 0) - final_weights = weights_store.retrieve_weights(run.name, run.train_until) - - for name, weight in init_weights.model.items(): - weight_diff = (weight - final_weights.model[name]).any() - assert weight_diff != 0, "Weights did not change" - - # assert train_stats and validation_scores are available - - training_stats = stats_store.retrieve_training_stats(run_config.name) - - assert training_stats.trained_until() == run_config.num_iterations +# @pytest.mark.parametrize("upsample_datasplit", [lf("upsample_six_class_datasplit")]) +# @pytest.mark.parametrize("task", [lf("distance_task")]) +# @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) +# @pytest.mark.parametrize("batch_norm", [True, False]) +# @pytest.mark.parametrize("upsample", [True]) +# @pytest.mark.parametrize("use_attention", [True, False]) +# @pytest.mark.parametrize("three_d", [True, False]) +# def test_upsample_train_unet( +# upsample_datasplit, task, trainer, batch_norm, upsample, use_attention, three_d +# ): +# store = create_config_store() +# stats_store = create_stats_store() +# weights_store = create_weights_store() + +# architecture_config = unet_architecture_builder( +# batch_norm, upsample, use_attention, three_d +# ) + +# run_config = RunConfig( +# name=f"{architecture_config.name}_run", +# task_config=task, +# architecture_config=architecture_config, +# trainer_config=trainer, +# datasplit_config=upsample_datasplit, +# repetition=0, +# num_iterations=1, +# ) +# try: +# store.store_run_config(run_config) +# except Exception as e: +# store.delete_run_config(run_config.name) +# store.store_run_config(run_config) + +# run = Run(run_config) + +# # ------------------------------------- + +# # train + +# weights_store.store_weights(run, 0) +# train_run(run) +# # weights_store.store_weights(run, run.train_until) + +# init_weights = weights_store.retrieve_weights(run.name, 0) +# final_weights = weights_store.retrieve_weights(run.name, 1) + +# for name, weight in init_weights.model.items(): +# weight_diff = (weight - final_weights.model[name]).any() +# assert weight_diff != 0, "Weights did not change" + +# # assert train_stats and validation_scores are available + +# training_stats = stats_store.retrieve_training_stats(run_config.name) + +# assert training_stats.trained_until() == run_config.num_iterations diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index 78319654c..97819400e 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -112,13 +112,13 @@ def test_validate_unet(datasplit, task, trainer, architecture): weights_store = create_weights_store() run_config = RunConfig( - name=f"{architecture.name}_run", + name=f"{architecture.name}_run_validate", task_config=task, architecture_config=architecture, trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=2, + num_iterations=10, ) try: store.store_run_config(run_config) From 58a16f222e58a91eb3d851c7d130f250a242aa23 Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 14 Nov 2024 16:50:05 -0500 Subject: [PATCH 24/43] more iterations: --- tests/operations/test_train.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index ebb6d4c34..441c42897 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -86,7 +86,7 @@ def test_train_non_stored_unet( trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=1, + num_iterations=10, ) run = Run(run_config) train_run(run) @@ -118,7 +118,7 @@ def test_train_unet( trainer_config=trainer, datasplit_config=datasplit, repetition=0, - num_iterations=1, + num_iterations=10, ) try: store.store_run_config(run_config) @@ -136,7 +136,7 @@ def test_train_unet( train_run(run) init_weights = weights_store.retrieve_weights(run.name, 0) - final_weights = weights_store.retrieve_weights(run.name, 1) + final_weights = weights_store.retrieve_weights(run.name, 10) for name, weight in init_weights.model.items(): weight_diff = (weight - final_weights.model[name]).any() @@ -174,7 +174,7 @@ def test_train_unet( # trainer_config=trainer, # datasplit_config=upsample_datasplit, # repetition=0, -# num_iterations=1, +# num_iterations=10, # ) # try: # store.store_run_config(run_config) @@ -193,7 +193,7 @@ def test_train_unet( # # weights_store.store_weights(run, run.train_until) # init_weights = weights_store.retrieve_weights(run.name, 0) -# final_weights = weights_store.retrieve_weights(run.name, 1) +# final_weights = weights_store.retrieve_weights(run.name, 10) # for name, weight in init_weights.model.items(): # weight_diff = (weight - final_weights.model[name]).any() From 0979152ef546390da087c3efc4ceab4af7e0417a Mon Sep 17 00:00:00 2001 From: mzouink Date: Thu, 14 Nov 2024 21:50:41 +0000 Subject: [PATCH 25/43] :art: Format Python code with psf/black --- dacapo/predict_local.py | 5 +++-- tests/fixtures/__init__.py | 8 +++++++- tests/fixtures/architectures.py | 4 ++-- tests/fixtures/runs.py | 1 - tests/operations/test_architecture.py | 1 - tests/operations/test_train.py | 1 - 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index d829dffdb..a4f21ab15 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -73,7 +73,9 @@ def predict( model_device = str(next(model.parameters()).device).split(":")[0] - assert model_device == str(device), f"Model is not on the right device, Model: {model_device}, Compute device: {device}" + assert model_device == str( + device + ), f"Model is not on the right device, Model: {model_device}, Compute device: {device}" def predict_fn(block): raw_input = raw_array.to_ndarray(block.read_roi) @@ -89,7 +91,6 @@ def predict_fn(block): raw_input = np.expand_dims(raw_input, 0) axis_names = ["c^"] + axis_names - with torch.no_grad(): model.eval() predictions = ( diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index d4186d29b..1c1908476 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -16,6 +16,12 @@ from .losses import dummy_loss from .post_processors import argmax, threshold from .predictors import distance_predictor, onehot_predictor -from .runs import dummy_run, distance_run, onehot_run, unet_2d_distance_run, unet_3d_distance_run +from .runs import ( + dummy_run, + distance_run, + onehot_run, + unet_2d_distance_run, + unet_3d_distance_run, +) from .tasks import dummy_task, distance_task, onehot_task, six_onehot_task from .trainers import dummy_trainer, gunpowder_trainer diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index 558dea561..4baca2da7 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -76,8 +76,8 @@ def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, - downsample_factors=[(4, 4), ( 4, 4)], - kernel_size_down=[[( 3, 3)] * 2] * 3, + downsample_factors=[(4, 4), (4, 4)], + kernel_size_down=[[(3, 3)] * 2] * 3, kernel_size_up=[[(3, 3)] * 2] * 2, constant_upsample=True, padding="valid", diff --git a/tests/fixtures/runs.py b/tests/fixtures/runs.py index d5e584f9f..b508079bc 100644 --- a/tests/fixtures/runs.py +++ b/tests/fixtures/runs.py @@ -75,7 +75,6 @@ def unet_2d_distance_run( ) - @pytest.fixture() def unet_3d_distance_run( six_class_datasplit, diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index 55ddf765a..1969ce33f 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -82,7 +82,6 @@ def test_2d_conv_unet( raise ValueError(f"Conv2d found in 3d unet {name}") - @pytest.mark.parametrize( "run_config", [ diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index 441c42897..bbb0ac221 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -110,7 +110,6 @@ def test_train_unet( batch_norm, upsample, use_attention, three_d ) - run_config = RunConfig( name=f"{architecture_config.name}_run", task_config=task, From c4a234b945c8b33f26a1b9928e4e2fef28589153 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 15 Nov 2024 11:29:34 -0500 Subject: [PATCH 26/43] add hot_distance test, and fix bugs --- .../tasks/predictors/dummy_predictor.py | 6 ++---- .../tasks/predictors/hot_distance_predictor.py | 8 +++----- .../predictors/inner_distance_predictor.py | 6 ++---- tests/fixtures/__init__.py | 3 ++- tests/fixtures/runs.py | 18 ++++++++++++++++++ tests/fixtures/tasks.py | 16 ++++++++++++++++ tests/operations/test_train.py | 1 + 7 files changed, 44 insertions(+), 14 deletions(-) diff --git a/dacapo/experiments/tasks/predictors/dummy_predictor.py b/dacapo/experiments/tasks/predictors/dummy_predictor.py index 3293f6423..2c495da56 100644 --- a/dacapo/experiments/tasks/predictors/dummy_predictor.py +++ b/dacapo/experiments/tasks/predictors/dummy_predictor.py @@ -71,9 +71,8 @@ def create_target(self, gt): # zeros return np_to_funlib_array( np.zeros((self.embedding_dims,) + gt.data.shape[-gt.dims :]), - gt.roi, + gt.roi.offset, gt.voxel_size, - ["c^"] + gt.axis_names, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -96,9 +95,8 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): return ( np_to_funlib_array( np.ones(target.data.shape), - target.roi, + target.roi.offset, target.voxel_size, - target.axis_names, ), None, ) diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py index 607c426f0..7c2361aee 100644 --- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py @@ -141,12 +141,11 @@ def create_target(self, gt): Examples: >>> target = predictor.create_target(gt) """ - target = self.process(gt.data, gt.voxel_size, self.norm, self.dt_scale_factor) + target = self.process(gt[:], gt.voxel_size, self.norm, self.dt_scale_factor) return np_to_funlib_array( target, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axis_names, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -209,9 +208,8 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): return ( np_to_funlib_array( weights, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axis_names, ), moving_class_counts, ) diff --git a/dacapo/experiments/tasks/predictors/inner_distance_predictor.py b/dacapo/experiments/tasks/predictors/inner_distance_predictor.py index b2f50b59a..a6f18d865 100644 --- a/dacapo/experiments/tasks/predictors/inner_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/inner_distance_predictor.py @@ -120,9 +120,8 @@ def create_target(self, gt): ) return np_to_funlib_array( distances, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axis_names, ) def create_weight(self, gt, target, mask, moving_class_counts=None): @@ -155,9 +154,8 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): return ( np_to_funlib_array( weights, - gt.roi, + gt.roi.offset, gt.voxel_size, - gt.axis_names, ), moving_class_counts, ) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 1c1908476..add8cbad6 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -22,6 +22,7 @@ onehot_run, unet_2d_distance_run, unet_3d_distance_run, + hot_distance_run, ) -from .tasks import dummy_task, distance_task, onehot_task, six_onehot_task +from .tasks import dummy_task, distance_task, onehot_task, six_onehot_task, hot_distance_task from .trainers import dummy_trainer, gunpowder_trainer diff --git a/tests/fixtures/runs.py b/tests/fixtures/runs.py index b508079bc..d66323bcf 100644 --- a/tests/fixtures/runs.py +++ b/tests/fixtures/runs.py @@ -20,6 +20,24 @@ def distance_run( num_iterations=10, ) +@pytest.fixture() +def hot_distance_run( + six_class_datasplit, + dummy_architecture, + hot_distance_task, + gunpowder_trainer, +): + yield RunConfig( + name="hot_distance_run", + task_config=hot_distance_task, + architecture_config=dummy_architecture, + trainer_config=gunpowder_trainer, + datasplit_config=six_class_datasplit, + repetition=0, + num_iterations=10, + ) + + @pytest.fixture() def dummy_run( diff --git a/tests/fixtures/tasks.py b/tests/fixtures/tasks.py index 4230fd9b7..2dcffc52b 100644 --- a/tests/fixtures/tasks.py +++ b/tests/fixtures/tasks.py @@ -2,6 +2,7 @@ DistanceTaskConfig, DummyTaskConfig, OneHotTaskConfig, + HotDistanceTaskConfig, ) import pytest @@ -27,6 +28,21 @@ def distance_task(): tol_distance=10, ) +@pytest.fixture() +def hot_distance_task(): + yield HotDistanceTaskConfig( + name="hot_distance_task", + channels=[ + "a", + "b", + "c", + "d", + "e", + "f", + ], + clip_distance=5, + tol_distance=10, + ) @pytest.fixture() def onehot_task(): diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index bbb0ac221..00c6b36e9 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -27,6 +27,7 @@ lf("distance_run"), lf("dummy_run"), lf("onehot_run"), + lf("hot_distance_run"), ], ) def test_train( From 0d2362caea7fcdc7591e7f5baf6e917bd1e078e5 Mon Sep 17 00:00:00 2001 From: mzouink Date: Fri, 15 Nov 2024 16:30:07 +0000 Subject: [PATCH 27/43] :art: Format Python code with psf/black --- tests/fixtures/__init__.py | 8 +++++++- tests/fixtures/runs.py | 2 +- tests/fixtures/tasks.py | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index add8cbad6..373b80796 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -24,5 +24,11 @@ unet_3d_distance_run, hot_distance_run, ) -from .tasks import dummy_task, distance_task, onehot_task, six_onehot_task, hot_distance_task +from .tasks import ( + dummy_task, + distance_task, + onehot_task, + six_onehot_task, + hot_distance_task, +) from .trainers import dummy_trainer, gunpowder_trainer diff --git a/tests/fixtures/runs.py b/tests/fixtures/runs.py index d66323bcf..c842db118 100644 --- a/tests/fixtures/runs.py +++ b/tests/fixtures/runs.py @@ -20,6 +20,7 @@ def distance_run( num_iterations=10, ) + @pytest.fixture() def hot_distance_run( six_class_datasplit, @@ -38,7 +39,6 @@ def hot_distance_run( ) - @pytest.fixture() def dummy_run( dummy_datasplit, diff --git a/tests/fixtures/tasks.py b/tests/fixtures/tasks.py index 2dcffc52b..bd8b25084 100644 --- a/tests/fixtures/tasks.py +++ b/tests/fixtures/tasks.py @@ -28,6 +28,7 @@ def distance_task(): tol_distance=10, ) + @pytest.fixture() def hot_distance_task(): yield HotDistanceTaskConfig( @@ -44,6 +45,7 @@ def hot_distance_task(): tol_distance=10, ) + @pytest.fixture() def onehot_task(): yield OneHotTaskConfig( From ea9f5dd5d1d1f2e143bb648bc07acf01e132ce86 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 16:25:32 -0800 Subject: [PATCH 28/43] autouse fork/spawn fixture so we test both this is necessary for getting compatibility with windows and macos --- tests/conf.py | 3 --- tests/conftest.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) delete mode 100644 tests/conf.py create mode 100644 tests/conftest.py diff --git a/tests/conf.py b/tests/conf.py deleted file mode 100644 index 57a8708d5..000000000 --- a/tests/conf.py +++ /dev/null @@ -1,3 +0,0 @@ -import multiprocessing as mp - -mp.set_start_method("fork", force=True) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..9a90c5cab --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,28 @@ +import multiprocessing as mp +import os +import yaml + +from dacapo.options import Options + +import pytest + + +@pytest.fixture(params=["fork", "spawn"], autouse=True) +def context(monkeypatch): + ctx = mp.get_context("spawn") + monkeypatch.setattr(mp, "Queue", ctx.Queue) + monkeypatch.setattr(mp, "Process", ctx.Process) + monkeypatch.setattr(mp, "Event", ctx.Event) + monkeypatch.setattr(mp, "Value", ctx.Value) + + +@pytest.fixture(autouse=True) +def runs_base_dir(tmpdir): + options_file = tmpdir / "dacapo.yaml" + os.environ["DACAPO_OPTIONS_FILE"] = f"{options_file}" + + with open(options_file, "w") as f: + options_file.write(yaml.safe_dump({"runs_base_dir": f"{tmpdir}"})) + + assert Options.config_file() == options_file + assert Options.instance().runs_base_dir == tmpdir From 0da091171909e355172f68e086e47ff0b7360bc4 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 16:27:33 -0800 Subject: [PATCH 29/43] add helpers to build simple parametrized test configs --- tests/operations/helpers.py | 160 ++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 tests/operations/helpers.py diff --git a/tests/operations/helpers.py b/tests/operations/helpers.py new file mode 100644 index 000000000..4867cf566 --- /dev/null +++ b/tests/operations/helpers.py @@ -0,0 +1,160 @@ +import numpy as np +from funlib.persistence import prepare_ds +from funlib.geometry import Coordinate + +from dacapo.experiments.datasplits import SimpleDataSplitConfig +from dacapo.experiments.tasks import ( + DistanceTaskConfig, + OneHotTaskConfig, + AffinitiesTaskConfig, +) +from dacapo.experiments.architectures import CNNectomeUNetConfig + +from pathlib import Path + + +def build_test_data_config( + tmpdir: Path, data_dims: int, channels: bool, upsample: bool, task_type: str +): + """ + Builds the simplest possible datasplit given the parameters. + + Labels are alternating planes/lines of 0/1 in the last dimension. + Intensities are random where labels are > 0, else 0. (If channels, stack twice.) + if task_type is "semantic", labels are binarized via labels > 0. + + if upsampling, labels are upsampled by a factor of 2 in each dimension + """ + + data_shape = (64, 64, 64)[-data_dims:] + mesh = np.meshgrid( + *[np.linspace(0, dim - 1, dim * (1 + upsample)) for dim in data_shape] + ) + labels = mesh[-1] * (mesh[-1] % 2 > 0.75) + + intensities = np.random.rand(*labels.shape) * labels > 0 + + if channels: + intensities = np.stack([intensities, intensities], axis=0) + + intensities_array = prepare_ds( + tmpdir / "test_data.zarr/raw", + intensities.shape, + offset=(0,) * data_dims, + voxel_size=(2,) * data_dims, + dtype=intensities.dtype, + mode="w", + ) + intensities_array[:] = intensities + + if task_type == "semantic": + labels = labels > 0 + + labels_array = prepare_ds( + tmpdir / "test_data.zarr/labels", + labels.shape, + offset=(0,) * data_dims, + voxel_size=(2 - upsample,) * data_dims, + dtype=labels.dtype, + mode="w", + ) + labels_array[:] = labels + + return SimpleDataSplitConfig(name="test_data", path=tmpdir / "test_data.zarr") + + +def build_test_task_config(task, data_dims: int, architecture_dims: int): + """ + Build the simplest task config given the parameters. + """ + if task == "distance": + return DistanceTaskConfig( + name="test_distance_task", + channels=["fg"], + clip_distance=4, + tol_distance=4, + scale_factor=8, + ) + if task == "onehot": + return OneHotTaskConfig( + name="test_onehot_task", classes=["bg", "fg"], kernel_size=1 + ) + if task == "affs": + # TODO: should configs be able to take any sequence for the neighborhood? + if data_dims == 2: + # 2D + neighborhood = [Coordinate(1, 0), Coordinate(0, 1)] + elif data_dims == 3 and architecture_dims == 2: + # 3D but only generate 2D affs + neighborhood = [Coordinate(0, 1, 0), Coordinate(0, 0, 1)] + elif data_dims == 3 and architecture_dims == 3: + # 3D + neighborhood = [ + Coordinate(1, 0, 0), + Coordinate(0, 1, 0), + Coordinate(0, 0, 1), + ] + return AffinitiesTaskConfig(name="test_affs_task", neighborhood=neighborhood) + + +def build_test_architecture_config( + data_dims: int, + architecture_dims: int, + channels: bool, + batch_norm: bool, + upsample: bool, + use_attention: bool, + padding: str, +): + """ + Build the simplest architecture config given the parameters. + """ + if data_dims == 2: + input_shape = (18, 18) + downsample_factors = [(2, 2)] + upsample_factors = [(2, 2)] * int(upsample) + + kernel_size_down = [[(3, 3)] * 2] * 2 + kernel_size_up = [[(3, 3)] * 2] * 1 + kernel_size_down = None # the default should work + kernel_size_up = None # the default should work + + elif data_dims == 3 and architecture_dims == 2: + input_shape = (1, 18, 18) + downsample_factors = [(1, 2, 2)] + + # test data upsamples in all dimensions so we have + # to here too + upsample_factors = [(2, 2, 2)] * int(upsample) + + # we have to force the 3D kernels to be 2D + kernel_size_down = [[(1, 3, 3)] * 2] * 2 + kernel_size_up = [[(1, 3, 3)] * 2] * 1 + + elif data_dims == 3 and architecture_dims == 3: + input_shape = (18, 18, 18) + downsample_factors = [(2, 2, 2)] + upsample_factors = [(2, 2, 2)] * int(upsample) + + kernel_size_down = [[(3, 3, 3)] * 2] * 2 + kernel_size_up = [[(3, 3, 3)] * 2] * 1 + kernel_size_down = None # the default should work + kernel_size_up = None # the default should work + + return CNNectomeUNetConfig( + name="test_cnnectome_unet", + input_shape=input_shape, + eval_shape_increase=input_shape, + fmaps_in=1 + channels, + num_fmaps=2, + fmaps_out=2, + fmap_inc_factor=2, + downsample_factors=downsample_factors, + kernel_size_down=kernel_size_down, + kernel_size_up=kernel_size_up, + constant_upsample=True, + upsample_factors=upsample_factors, + batch_norm=batch_norm, + use_attention=use_attention, + padding=padding, + ) From 29cbd6dc116c00feca5763a83960e168ca2f485d Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:43:19 -0800 Subject: [PATCH 30/43] simplify tests --- tests/fixtures/__init__.py | 1 - tests/fixtures/architectures.py | 51 +-------- tests/fixtures/predictors.py | 2 +- tests/fixtures/tasks.py | 2 + tests/operations/helpers.py | 16 ++- tests/operations/test_architecture.py | 33 +----- tests/operations/test_mini.py | 78 ++++++++++++++ tests/operations/test_train.py | 147 +------------------------- 8 files changed, 100 insertions(+), 230 deletions(-) create mode 100644 tests/operations/test_mini.py diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 373b80796..e0d4a47a0 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -3,7 +3,6 @@ dummy_architecture, unet_architecture, unet_3d_architecture, - unet_architecture_builder, ) from .arrays import dummy_array, zarr_array, cellmap_array from .datasplits import ( diff --git a/tests/fixtures/architectures.py b/tests/fixtures/architectures.py index 4baca2da7..79e7f9fca 100644 --- a/tests/fixtures/architectures.py +++ b/tests/fixtures/architectures.py @@ -17,15 +17,15 @@ def dummy_architecture(): def unet_architecture(): yield CNNectomeUNetConfig( name="tmp_unet_architecture", - input_shape=(132, 132), - eval_shape_increase=(32, 32), + input_shape=(1, 132, 132), + eval_shape_increase=(1, 32, 32), fmaps_in=1, num_fmaps=8, fmaps_out=8, fmap_inc_factor=2, - downsample_factors=[(4, 4), (4, 4)], - kernel_size_down=[[(3, 3)] * 2] * 3, - kernel_size_up=[[(3, 3)] * 2] * 2, + downsample_factors=[(1, 4, 4), (1, 4, 4)], + kernel_size_down=[[(1, 3, 3)] * 2] * 3, + kernel_size_up=[[(1, 3, 3)] * 2] * 2, constant_upsample=True, padding="valid", ) @@ -44,44 +44,3 @@ def unet_3d_architecture(): downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], constant_upsample=True, ) - - -def unet_architecture_builder(batch_norm, upsample, use_attention, three_d): - name = "3d_unet" if three_d else "2d_unet" - name = f"{name}_bn" if batch_norm else name - name = f"{name}_up" if upsample else name - name = f"{name}_att" if use_attention else name - - if three_d: - return CNNectomeUNetConfig( - name=name, - input_shape=(188, 188, 188), - eval_shape_increase=(72, 72, 72), - fmaps_in=1, - num_fmaps=6, - fmaps_out=6, - fmap_inc_factor=2, - downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)], - constant_upsample=True, - upsample_factors=[(2, 2, 2)] if upsample else [], - batch_norm=batch_norm, - use_attention=use_attention, - ) - else: - return CNNectomeUNetConfig( - name=name, - input_shape=(132, 132), - eval_shape_increase=(32, 32), - fmaps_in=1, - num_fmaps=8, - fmaps_out=8, - fmap_inc_factor=2, - downsample_factors=[(4, 4), (4, 4)], - kernel_size_down=[[(3, 3)] * 2] * 3, - kernel_size_up=[[(3, 3)] * 2] * 2, - constant_upsample=True, - padding="valid", - batch_norm=batch_norm, - use_attention=use_attention, - upsample_factors=[(2, 2)] if upsample else [], - ) diff --git a/tests/fixtures/predictors.py b/tests/fixtures/predictors.py index cc93369cf..c6dd6de51 100644 --- a/tests/fixtures/predictors.py +++ b/tests/fixtures/predictors.py @@ -10,4 +10,4 @@ def distance_predictor(): @pytest.fixture() def onehot_predictor(): - yield OneHotPredictor(classes=["a", "b", "c"]) + yield OneHotPredictor(classes=["a", "b", "c"], kernel_size=1) diff --git a/tests/fixtures/tasks.py b/tests/fixtures/tasks.py index bd8b25084..5792811b4 100644 --- a/tests/fixtures/tasks.py +++ b/tests/fixtures/tasks.py @@ -51,6 +51,7 @@ def onehot_task(): yield OneHotTaskConfig( name="one_hot_task", classes=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], + kernel_size=1, ) @@ -59,4 +60,5 @@ def six_onehot_task(): yield OneHotTaskConfig( name="one_hot_task", classes=["a", "b", "c", "d", "e", "f"], + kernel_size=1, ) diff --git a/tests/operations/helpers.py b/tests/operations/helpers.py index 4867cf566..74fb43208 100644 --- a/tests/operations/helpers.py +++ b/tests/operations/helpers.py @@ -26,7 +26,8 @@ def build_test_data_config( if upsampling, labels are upsampled by a factor of 2 in each dimension """ - data_shape = (64, 64, 64)[-data_dims:] + data_shape = (32, 32, 32)[-data_dims:] + axis_names = ["z", "y", "x"][-data_dims:] mesh = np.meshgrid( *[np.linspace(0, dim - 1, dim * (1 + upsample)) for dim in data_shape] ) @@ -42,6 +43,7 @@ def build_test_data_config( intensities.shape, offset=(0,) * data_dims, voxel_size=(2,) * data_dims, + axis_names=["c^"] * int(channels) + axis_names, dtype=intensities.dtype, mode="w", ) @@ -55,6 +57,7 @@ def build_test_data_config( labels.shape, offset=(0,) * data_dims, voxel_size=(2 - upsample,) * data_dims, + axis_names=axis_names, dtype=labels.dtype, mode="w", ) @@ -110,7 +113,8 @@ def build_test_architecture_config( Build the simplest architecture config given the parameters. """ if data_dims == 2: - input_shape = (18, 18) + input_shape = (32, 32) + eval_shape_increase = (8, 8) downsample_factors = [(2, 2)] upsample_factors = [(2, 2)] * int(upsample) @@ -120,7 +124,8 @@ def build_test_architecture_config( kernel_size_up = None # the default should work elif data_dims == 3 and architecture_dims == 2: - input_shape = (1, 18, 18) + input_shape = (1, 32, 32) + eval_shape_increase = (15, 8, 8) downsample_factors = [(1, 2, 2)] # test data upsamples in all dimensions so we have @@ -132,7 +137,8 @@ def build_test_architecture_config( kernel_size_up = [[(1, 3, 3)] * 2] * 1 elif data_dims == 3 and architecture_dims == 3: - input_shape = (18, 18, 18) + input_shape = (32, 32, 32) + eval_shape_increase = (8, 8, 8) downsample_factors = [(2, 2, 2)] upsample_factors = [(2, 2, 2)] * int(upsample) @@ -144,7 +150,7 @@ def build_test_architecture_config( return CNNectomeUNetConfig( name="test_cnnectome_unet", input_shape=input_shape, - eval_shape_increase=input_shape, + eval_shape_increase=eval_shape_increase, fmaps_in=1 + channels, num_fmaps=2, fmaps_out=2, diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index 1969ce33f..2be724d07 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -52,28 +52,14 @@ def test_stored_architecture( assert architecture.dims is not None, f"Architecture dims are None {architecture}" -@pytest.mark.parametrize( - "architecture_config", - [ - lf("unet_architecture"), - ], -) -def test_3d_conv_unet( - architecture_config, -): - architecture = architecture_config.architecture_type(architecture_config) - for name, module in architecture.named_modules(): - if isinstance(module, nn.Conv3d): - raise ValueError(f"Conv3d found in 2d unet {name}") - - @pytest.mark.parametrize( "architecture_config", [ lf("unet_3d_architecture"), + lf("unet_architecture"), ], ) -def test_2d_conv_unet( +def test_conv_dims( architecture_config, ): architecture = architecture_config.architecture_type(architecture_config) @@ -82,21 +68,6 @@ def test_2d_conv_unet( raise ValueError(f"Conv2d found in 3d unet {name}") -@pytest.mark.parametrize( - "run_config", - [ - lf("unet_2d_distance_run"), - ], -) -def test_2d_conv_unet_in_run( - run_config, -): - run = Run(run_config) - model = run.model - for name, module in model.named_modules(): - if isinstance(module, nn.Conv3d): - raise ValueError(f"Conv3d found in 2d unet {name}") - @pytest.mark.parametrize( "run_config", diff --git a/tests/operations/test_mini.py b/tests/operations/test_mini.py new file mode 100644 index 000000000..789f527d1 --- /dev/null +++ b/tests/operations/test_mini.py @@ -0,0 +1,78 @@ +from ..fixtures import * +from .helpers import ( + build_test_data_config, + build_test_task_config, + build_test_architecture_config, +) + +from dacapo.experiments import Run +from dacapo.train import train_run +from dacapo.validate import validate_run + +import pytest +from pytest_lazy_fixtures import lf + +from dacapo.experiments.run_config import RunConfig + +import pytest + + +@pytest.mark.parametrize("data_dims", [2, 3]) +@pytest.mark.parametrize("channels", [True, False]) +@pytest.mark.parametrize("task", ["distance", "onehot", "affs"]) +@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) +@pytest.mark.parametrize("architecture_dims", [2, 3]) +@pytest.mark.parametrize("upsample", [True, False]) +# @pytest.mark.parametrize("upsample", [False]) +# @pytest.mark.parametrize("batch_norm", [True, False]) +@pytest.mark.parametrize("batch_norm", [False]) +# @pytest.mark.parametrize("use_attention", [True, False]) +@pytest.mark.parametrize("use_attention", [False]) +@pytest.mark.parametrize("padding", ["valid", "same"]) +@pytest.mark.parametrize("func", ["train", "validate"]) +def test_mini( + tmpdir, + data_dims, + channels, + task, + trainer, + architecture_dims, + batch_norm, + upsample, + use_attention, + padding, + func, +): + # Invalid configurations: + if data_dims == 2 and architecture_dims == 3: + # cannot train a 3D model on 2D data + # TODO: maybe check that an appropriate warning is raised somewhere + return + + data_config = build_test_data_config( + tmpdir, + data_dims, + channels, + upsample, + "instance" if task == "affs" else "semantic", + ) + task_config = build_test_task_config(task, data_dims, architecture_dims) + architecture_config = build_test_architecture_config( + data_dims, architecture_dims, channels, batch_norm, upsample, use_attention, padding + ) + + run_config = RunConfig( + name=f"test_{func}", + task_config=task_config, + architecture_config=architecture_config, + trainer_config=trainer, + datasplit_config=data_config, + repetition=0, + num_iterations=1, + ) + run = Run(run_config) + + if func == "train": + train_run(run) + elif func == "validate": + validate_run(run, 1) diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index 00c6b36e9..ae8ad1760 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -9,12 +9,6 @@ import pytest from pytest_lazy_fixtures import lf -from dacapo.experiments.run_config import RunConfig - -import logging - -logging.basicConfig(level=logging.INFO) - import pytest @@ -30,7 +24,7 @@ lf("hot_distance_run"), ], ) -def test_train( +def test_large( options, run_config, ): @@ -65,142 +59,3 @@ def test_train( assert training_stats.trained_until() == run_config.num_iterations - -@pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) -@pytest.mark.parametrize("task", [lf("distance_task")]) -@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("batch_norm", [False]) -@pytest.mark.parametrize("upsample", [False]) -@pytest.mark.parametrize("use_attention", [False]) -@pytest.mark.parametrize("three_d", [False]) -def test_train_non_stored_unet( - datasplit, task, trainer, batch_norm, upsample, use_attention, three_d -): - architecture_config = unet_architecture_builder( - batch_norm, upsample, use_attention, three_d - ) - - run_config = RunConfig( - name=f"{architecture_config.name}_run_v", - task_config=task, - architecture_config=architecture_config, - trainer_config=trainer, - datasplit_config=datasplit, - repetition=0, - num_iterations=10, - ) - run = Run(run_config) - train_run(run) - - -@pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) -@pytest.mark.parametrize("task", [lf("distance_task")]) -@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize("batch_norm", [True, False]) -@pytest.mark.parametrize("upsample", [False]) -@pytest.mark.parametrize("use_attention", [True, False]) -@pytest.mark.parametrize("three_d", [True, False]) -def test_train_unet( - datasplit, task, trainer, batch_norm, upsample, use_attention, three_d -): - store = create_config_store() - stats_store = create_stats_store() - weights_store = create_weights_store() - - architecture_config = unet_architecture_builder( - batch_norm, upsample, use_attention, three_d - ) - - run_config = RunConfig( - name=f"{architecture_config.name}_run", - task_config=task, - architecture_config=architecture_config, - trainer_config=trainer, - datasplit_config=datasplit, - repetition=0, - num_iterations=10, - ) - try: - store.store_run_config(run_config) - except Exception as e: - store.delete_run_config(run_config.name) - store.store_run_config(run_config) - - run = Run(run_config) - - # ------------------------------------- - - # train - - weights_store.store_weights(run, 0) - train_run(run) - - init_weights = weights_store.retrieve_weights(run.name, 0) - final_weights = weights_store.retrieve_weights(run.name, 10) - - for name, weight in init_weights.model.items(): - weight_diff = (weight - final_weights.model[name]).any() - assert weight_diff != 0, "Weights did not change" - - # assert train_stats and validation_scores are available - - training_stats = stats_store.retrieve_training_stats(run_config.name) - - assert training_stats.trained_until() == run_config.num_iterations - - -# @pytest.mark.parametrize("upsample_datasplit", [lf("upsample_six_class_datasplit")]) -# @pytest.mark.parametrize("task", [lf("distance_task")]) -# @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -# @pytest.mark.parametrize("batch_norm", [True, False]) -# @pytest.mark.parametrize("upsample", [True]) -# @pytest.mark.parametrize("use_attention", [True, False]) -# @pytest.mark.parametrize("three_d", [True, False]) -# def test_upsample_train_unet( -# upsample_datasplit, task, trainer, batch_norm, upsample, use_attention, three_d -# ): -# store = create_config_store() -# stats_store = create_stats_store() -# weights_store = create_weights_store() - -# architecture_config = unet_architecture_builder( -# batch_norm, upsample, use_attention, three_d -# ) - -# run_config = RunConfig( -# name=f"{architecture_config.name}_run", -# task_config=task, -# architecture_config=architecture_config, -# trainer_config=trainer, -# datasplit_config=upsample_datasplit, -# repetition=0, -# num_iterations=10, -# ) -# try: -# store.store_run_config(run_config) -# except Exception as e: -# store.delete_run_config(run_config.name) -# store.store_run_config(run_config) - -# run = Run(run_config) - -# # ------------------------------------- - -# # train - -# weights_store.store_weights(run, 0) -# train_run(run) -# # weights_store.store_weights(run, run.train_until) - -# init_weights = weights_store.retrieve_weights(run.name, 0) -# final_weights = weights_store.retrieve_weights(run.name, 10) - -# for name, weight in init_weights.model.items(): -# weight_diff = (weight - final_weights.model[name]).any() -# assert weight_diff != 0, "Weights did not change" - -# # assert train_stats and validation_scores are available - -# training_stats = stats_store.retrieve_training_stats(run_config.name) - -# assert training_stats.trained_until() == run_config.num_iterations From 02ee936fdd7af3ab827eff1924433dc7159c0fbe Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:43:43 -0800 Subject: [PATCH 31/43] fix hard coded dimensions --- dacapo/predict_local.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index a4f21ab15..88aaa5cfc 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -44,10 +44,12 @@ def predict( else: input_roi = output_roi.grow(context, context) - read_roi = Roi((0, 0, 0), input_size) + read_roi = Roi((0,) * input_size.dims, input_size) write_roi = read_roi.grow(-context, -context) - axes = ["c^", "z", "y", "x"] + axes = raw_array.axis_names + if "c^" not in axes: + axes = ["c^"] + axes num_channels = model.num_out_channels @@ -73,8 +75,8 @@ def predict( model_device = str(next(model.parameters()).device).split(":")[0] - assert model_device == str( - device + assert ( + model_device == str(device) ), f"Model is not on the right device, Model: {model_device}, Compute device: {device}" def predict_fn(block): @@ -103,7 +105,7 @@ def predict_fn(block): predictions = Array( predictions, block.write_roi.offset, - raw_array.voxel_size, + output_voxel_size, axis_names, raw_array.units, ) @@ -120,7 +122,7 @@ def predict_fn(block): task = daisy.Task( f"predict_{out_container}_{out_dataset}", total_roi=input_roi, - read_roi=Roi((0, 0, 0), input_size), + read_roi=Roi((0,)*input_size.dims, input_size), write_roi=Roi(context, output_size), process_function=predict_fn, check_function=None, From 242eeb11f88e18bed3e7e4cda6b50292639ae82e Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:43:53 -0800 Subject: [PATCH 32/43] bugfix: typo --- dacapo/experiments/datasplits/simple_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/datasplits/simple_config.py b/dacapo/experiments/datasplits/simple_config.py index 53a66945b..9ee88283a 100644 --- a/dacapo/experiments/datasplits/simple_config.py +++ b/dacapo/experiments/datasplits/simple_config.py @@ -44,7 +44,7 @@ def get_paths(self, group_name: str) -> list[Path]: len(level_2_matches) == 0 ), f"Found raw data at {level_1} and {level_2}" return [Path(x).parent for x in level_1_matches] - elif len(level_2_matches).parent > 0: + elif len(level_2_matches) > 0: return [Path(x) for x in level_2_matches] raise Exception(f"No raw data found at {level_0} or {level_1} or {level_2}") From 5898c173f5798cbeaa2cfe9e57c121d05980d989 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:45:23 -0800 Subject: [PATCH 33/43] bugfix: hardcoded dimensions --- .../tasks/post_processors/argmax_post_processor.py | 2 +- .../tasks/post_processors/threshold_post_processor.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py index f736d3e17..a88b89267 100644 --- a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py @@ -133,7 +133,7 @@ def process( overwrite=True, ) - read_roi = Roi((0, 0, 0), block_size[-self.prediction_array.dims :]) + read_roi = Roi((0,)*block_size.dims, block_size) input_array = open_ds( f"{self.prediction_array_identifier.container.path}/{self.prediction_array_identifier.dataset}" ) diff --git a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py index 0c137e2f6..778064fcc 100644 --- a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py @@ -111,13 +111,13 @@ def process( if self.prediction_array._source_data.chunks is not None: block_size = self.prediction_array._source_data.chunks - write_size = [ + write_size = Coordinate([ b * v for b, v in zip( block_size[-self.prediction_array.dims :], self.prediction_array.voxel_size, ) - ] + ]) output_array = create_from_identifier( output_array_identifier, self.prediction_array.axis_names, @@ -128,7 +128,7 @@ def process( overwrite=True, ) - read_roi = Roi((0, 0, 0), write_size[-self.prediction_array.dims :]) + read_roi = Roi(write_size * 0, write_size) input_array = open_ds( f"{self.prediction_array_identifier.container.path}/{self.prediction_array_identifier.dataset}" ) From ada4a982f57cdc3307fe610a6fd29c9bad295bc2 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:45:31 -0800 Subject: [PATCH 34/43] simplify validation tests --- tests/operations/test_validate.py | 108 +++--------------------------- 1 file changed, 8 insertions(+), 100 deletions(-) diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index 97819400e..4df49a602 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -1,14 +1,9 @@ -import os -from upath import UPath as Path -import shutil from ..fixtures import * from dacapo.experiments import Run from dacapo.store.create_store import create_config_store, create_weights_store from dacapo import validate, validate_run -from dacapo.experiments.run_config import RunConfig - import pytest from pytest_lazy_fixtures import lf @@ -24,110 +19,23 @@ lf("onehot_run"), ], ) -def test_validate( +def test_large( options, run_config, ): - # set debug to True to run the test in a specific directory (for debugging) - debug = False - if debug: - tmp_path = f"{Path(__file__).parent}/tmp" - if os.path.exists(tmp_path): - shutil.rmtree(tmp_path, ignore_errors=True) - os.makedirs(tmp_path, exist_ok=True) - old_path = os.getcwd() - os.chdir(tmp_path) - # when done debugging, delete "tests/operations/tmp" - # ------------------------------------- store = create_config_store() + weights_store = create_weights_store() store.store_run_config(run_config) + # validate validate(run_config.name, 0) - # weights_store.store_weights(run, 1) - # validate_run(run_config.name, 1) + + # validate_run + run = Run(run_config) + weights_store.store_weights(run, 1) + validate_run(run, 1) # test validating weights that don't exist with pytest.raises(FileNotFoundError): validate(run_config.name, 2) - if debug: - os.chdir(old_path) - - -@pytest.mark.parametrize( - "run_config", - [ - lf("distance_run"), - lf("onehot_run"), - ], -) -def test_validate_run( - options, - run_config, -): - # set debug to True to run the test in a specific directory (for debugging) - debug = False - if debug: - tmp_path = f"{Path(__file__).parent}/tmp" - if os.path.exists(tmp_path): - shutil.rmtree(tmp_path, ignore_errors=True) - os.makedirs(tmp_path, exist_ok=True) - old_path = os.getcwd() - os.chdir(tmp_path) - # when done debugging, delete "tests/operations/tmp" - # ------------------------------------- - - # create a store - - store = create_config_store() - weights_store = create_weights_store() - - # store the configs - - store.store_run_config(run_config) - - run_config = store.retrieve_run_config(run_config.name) - run = Run(run_config) - - # ------------------------------------- - - # validate - - # test validating iterations for which we know there are weights - weights_store.store_weights(run, 0) - validate_run(run, 0) - - if debug: - os.chdir(old_path) - - -@pytest.mark.parametrize("datasplit", [lf("six_class_datasplit")]) -@pytest.mark.parametrize("task", [lf("distance_task"), lf("six_onehot_task")]) -@pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) -@pytest.mark.parametrize( - "architecture", [lf("unet_architecture"), lf("unet_3d_architecture")] -) -def test_validate_unet(datasplit, task, trainer, architecture): - store = create_config_store() - weights_store = create_weights_store() - - run_config = RunConfig( - name=f"{architecture.name}_run_validate", - task_config=task, - architecture_config=architecture, - trainer_config=trainer, - datasplit_config=datasplit, - repetition=0, - num_iterations=10, - ) - try: - store.store_run_config(run_config) - except Exception as e: - store.delete_run_config(run_config.name) - store.store_run_config(run_config) - - run = Run(run_config) - - # ------------------------------------- - weights_store.store_weights(run, 0) - validate(run.name, 0) From f08b65e097de2ed175163ad32935c8ea6e365fe5 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:46:16 -0800 Subject: [PATCH 35/43] bugfix: Distance predictor wasn't working properly when run on single class prediction tasks --- .../tasks/predictors/distance_predictor.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 07cb92701..172db065a 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -223,6 +223,10 @@ def create_distance_mask( >>> predictor.create_distance_mask(distances, mask, voxel_size, normalize, normalize_args) """ + no_channel_dim = len(mask.shape) == len(distances.shape) - 1 + if no_channel_dim: + mask = mask[np.newaxis] + mask_output = mask.copy() for i, (channel_distance, channel_mask) in enumerate(zip(distances, mask)): tmp = np.zeros( @@ -275,6 +279,8 @@ def create_distance_mask( np.sum(channel_mask_output) ) ) + if no_channel_dim: + mask_output = mask_output[0] return mask_output def process( @@ -300,7 +306,20 @@ def process( >>> predictor.process(labels, voxel_size, normalize, normalize_args) """ + + num_dims = len(labels.shape) + if num_dims == voxel_size.dims: + channel_dim = False + elif num_dims == voxel_size.dims + 1: + channel_dim = True + else: + raise ValueError("Cannot handle multiple channel dims") + + if not channel_dim: + labels = labels[np.newaxis] + all_distances = np.zeros(labels.shape, dtype=np.float32) - 1 + for ii, channel in enumerate(labels): boundaries = self.__find_boundaries(channel) @@ -358,7 +377,7 @@ def __find_boundaries(self, labels: np.ndarray): # bound.: 00000001000100000001000 2n - 1 if labels.dtype == bool: - raise ValueError("Labels should not be bools") + # raise ValueError("Labels should not be bools") labels = labels.astype(np.uint8) logger.debug(f"computing boundaries for {labels.shape}") From 425cbfc1197c9422adc0a1827faad644fc1a39c2 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:50:34 -0800 Subject: [PATCH 36/43] Deprication warning: If the kernel size of the task is set to 3, then this will cause errors if you are trying to use a model with input shape `(1, y, x)`. The head should just be mapping the penultimate layer embeddings into the appropriate dimensions for the task so it desn't need a size larger than 1. --- dacapo/experiments/tasks/hot_distance_task.py | 9 +++++++++ .../tasks/hot_distance_task_config.py | 5 +++++ dacapo/experiments/tasks/one_hot_task.py | 14 +++++++++++++- dacapo/experiments/tasks/one_hot_task_config.py | 3 +++ .../tasks/predictors/dummy_predictor.py | 2 +- .../tasks/predictors/hot_distance_predictor.py | 7 ++++--- .../tasks/predictors/one_hot_predictor.py | 16 +++++++++++++--- 7 files changed, 48 insertions(+), 8 deletions(-) diff --git a/dacapo/experiments/tasks/hot_distance_task.py b/dacapo/experiments/tasks/hot_distance_task.py index 630e58ed5..24cb23675 100644 --- a/dacapo/experiments/tasks/hot_distance_task.py +++ b/dacapo/experiments/tasks/hot_distance_task.py @@ -4,6 +4,7 @@ from .predictors import HotDistancePredictor from .task import Task +import warnings class HotDistanceTask(Task): """ @@ -34,6 +35,14 @@ def __init__(self, task_config): >>> task = HotDistanceTask(task_config) """ + + if task_config.kernel_size is None: + warnings.warn( + "The default kernel size of 3 will be changing to 1. " + "Please specify the kernel size explicitly.", + DeprecationWarning, + ) + task_config.kernel_size = 3 self.predictor = HotDistancePredictor( channels=task_config.channels, scale_factor=task_config.scale_factor, diff --git a/dacapo/experiments/tasks/hot_distance_task_config.py b/dacapo/experiments/tasks/hot_distance_task_config.py index 18cab91b3..d140e38e4 100644 --- a/dacapo/experiments/tasks/hot_distance_task_config.py +++ b/dacapo/experiments/tasks/hot_distance_task_config.py @@ -56,3 +56,8 @@ class HotDistanceTaskConfig(TaskConfig): "is less than the distance to object boundary." }, ) + + + kernel_size: int | None = attr.ib( + default=None, + ) \ No newline at end of file diff --git a/dacapo/experiments/tasks/one_hot_task.py b/dacapo/experiments/tasks/one_hot_task.py index 870140f50..55d115d15 100644 --- a/dacapo/experiments/tasks/one_hot_task.py +++ b/dacapo/experiments/tasks/one_hot_task.py @@ -4,6 +4,8 @@ from .predictors import OneHotPredictor from .task import Task +import warnings + class OneHotTask(Task): """ @@ -30,7 +32,17 @@ def __init__(self, task_config): Examples: >>> task = OneHotTask(task_config) """ - self.predictor = OneHotPredictor(classes=task_config.classes) + + if task_config.kernel_size is None: + warnings.warn( + "The default kernel size of 3 will be changing to 1. " + "Please specify the kernel size explicitly.", + DeprecationWarning, + ) + task_config.kernel_size = 3 + self.predictor = OneHotPredictor( + classes=task_config.classes, kernel_size=task_config.kernel_size + ) self.loss = DummyLoss() self.post_processor = ArgmaxPostProcessor() self.evaluator = DummyEvaluator() diff --git a/dacapo/experiments/tasks/one_hot_task_config.py b/dacapo/experiments/tasks/one_hot_task_config.py index de4817a0e..4207448de 100644 --- a/dacapo/experiments/tasks/one_hot_task_config.py +++ b/dacapo/experiments/tasks/one_hot_task_config.py @@ -28,3 +28,6 @@ class OneHotTaskConfig(TaskConfig): classes: List[str] = attr.ib( metadata={"help_text": "The classes corresponding with each id starting from 0"} ) + kernel_size: int | None = attr.ib( + default=None, + ) diff --git a/dacapo/experiments/tasks/predictors/dummy_predictor.py b/dacapo/experiments/tasks/predictors/dummy_predictor.py index 2c495da56..46da2f6d9 100644 --- a/dacapo/experiments/tasks/predictors/dummy_predictor.py +++ b/dacapo/experiments/tasks/predictors/dummy_predictor.py @@ -50,7 +50,7 @@ def create_model(self, architecture): >>> model = predictor.create_model(architecture) """ head = torch.nn.Conv3d( - architecture.num_out_channels, self.embedding_dims, kernel_size=3 + architecture.num_out_channels, self.embedding_dims, kernel_size=1 ) return Model(architecture, head) diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py index 7c2361aee..2538150e9 100644 --- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py @@ -49,7 +49,7 @@ class HotDistancePredictor(Predictor): This is a subclass of Predictor. """ - def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool): + def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool, kernel_size: int): """ Initializes the HotDistancePredictor. @@ -64,6 +64,7 @@ def __init__(self, channels: List[str], scale_factor: float, mask_distances: boo Note: The channels argument is a list of strings, each string is the name of a class that is being segmented. """ + self.kernel_size = kernel_size self.channels = ( channels * 2 ) # one hot + distance (TODO: add hot/distance to channel names) @@ -119,11 +120,11 @@ def create_model(self, architecture): """ if architecture.dims == 2: head = torch.nn.Conv2d( - architecture.num_out_channels, self.embedding_dims, kernel_size=3 + architecture.num_out_channels, self.embedding_dims, self.kernel_size=1 ) elif architecture.dims == 3: head = torch.nn.Conv3d( - architecture.num_out_channels, self.embedding_dims, kernel_size=3 + architecture.num_out_channels, self.embedding_dims, self.kernel_size=1 ) return Model(architecture, head) diff --git a/dacapo/experiments/tasks/predictors/one_hot_predictor.py b/dacapo/experiments/tasks/predictors/one_hot_predictor.py index 1ad7fdeec..ff6e21db6 100644 --- a/dacapo/experiments/tasks/predictors/one_hot_predictor.py +++ b/dacapo/experiments/tasks/predictors/one_hot_predictor.py @@ -30,7 +30,7 @@ class OneHotPredictor(Predictor): This is a subclass of Predictor. """ - def __init__(self, classes: List[str]): + def __init__(self, classes: List[str], kernel_size: int): """ Initialize the OneHotPredictor. @@ -42,6 +42,7 @@ def __init__(self, classes: List[str]): >>> predictor = OneHotPredictor(classes) """ self.classes = classes + self.kernel_size = kernel_size @property def embedding_dims(self): @@ -70,8 +71,17 @@ def create_model(self, architecture): Examples: >>> model = predictor.create_model(architecture) """ - head = torch.nn.Conv3d( - architecture.num_out_channels, self.embedding_dims, kernel_size=3 + + if architecture.dims == 3: + conv_layer = torch.nn.Conv3d + elif architecture.dims == 2: + conv_layer = torch.nn.Conv2d + else: + raise Exception(f"Unsupported number of dimensions: {architecture.dims}") + head = conv_layer( + architecture.num_out_channels, + self.embedding_dims, + kernel_size=self.kernel_size, ) return Model(architecture, head) From 74fcc3af75363c9fa822c0b6e656285c553b8454 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:51:01 -0800 Subject: [PATCH 37/43] bugfix: remove the mask_integral_downsample_factor from use. Its not used anywhere else --- dacapo/experiments/trainers/gunpowder_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index bea9c96e2..507151ad7 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -268,13 +268,13 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): request.add(weight_key, output_size) request.add( mask_placeholder, - prediction_voxel_size * self.mask_integral_downsample_factor, + prediction_voxel_size, ) # request additional keys for snapshots request.add(gt_key, output_size) request.add(mask_key, output_size) request[mask_placeholder].roi = request[mask_placeholder].roi.snap_to_grid( - prediction_voxel_size * self.mask_integral_downsample_factor + prediction_voxel_size ) self._request = request From c52ee0fc3483d7fc8e6626e2bdcaf5cfc9d0b5a9 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:51:32 -0800 Subject: [PATCH 38/43] bugfix: balance_weights now handles boolean ground truth --- dacapo/utils/balance_weights.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dacapo/utils/balance_weights.py b/dacapo/utils/balance_weights.py index e713745c6..5a53852ef 100644 --- a/dacapo/utils/balance_weights.py +++ b/dacapo/utils/balance_weights.py @@ -69,6 +69,9 @@ def balance_weights( scale_slab *= np.take(w, labels_slab) """ + if label_data.dtype == bool: + label_data = label_data.astype(np.uint8) + if moving_counts is None: moving_counts = [] unique_labels = np.unique(label_data) From abfcd02361d96282173dfe694f31cb9a6457dae3 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:53:33 -0800 Subject: [PATCH 39/43] bugfix: kernel size --- dacapo/experiments/tasks/hot_distance_task.py | 1 + dacapo/experiments/tasks/predictors/hot_distance_predictor.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/tasks/hot_distance_task.py b/dacapo/experiments/tasks/hot_distance_task.py index 24cb23675..382eaf8b1 100644 --- a/dacapo/experiments/tasks/hot_distance_task.py +++ b/dacapo/experiments/tasks/hot_distance_task.py @@ -47,6 +47,7 @@ def __init__(self, task_config): channels=task_config.channels, scale_factor=task_config.scale_factor, mask_distances=task_config.mask_distances, + kernel_size=task_config.kernel_size, ) self.loss = HotDistanceLoss() self.post_processor = ThresholdPostProcessor() diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py index 2538150e9..4a18a3154 100644 --- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py @@ -120,11 +120,11 @@ def create_model(self, architecture): """ if architecture.dims == 2: head = torch.nn.Conv2d( - architecture.num_out_channels, self.embedding_dims, self.kernel_size=1 + architecture.num_out_channels, self.embedding_dims, self.kernel_size ) elif architecture.dims == 3: head = torch.nn.Conv3d( - architecture.num_out_channels, self.embedding_dims, self.kernel_size=1 + architecture.num_out_channels, self.embedding_dims, self.kernel_size ) return Model(architecture, head) From e302c332da6cbceb7503aee5d008a77311767193 Mon Sep 17 00:00:00 2001 From: William Patton Date: Mon, 18 Nov 2024 17:55:02 -0800 Subject: [PATCH 40/43] add todo on test --- tests/operations/test_mini.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/operations/test_mini.py b/tests/operations/test_mini.py index 789f527d1..abc49239a 100644 --- a/tests/operations/test_mini.py +++ b/tests/operations/test_mini.py @@ -17,13 +17,14 @@ import pytest +# TODO: Move unet parameters that don't affect interaction with other modules +# to a separate architcture test @pytest.mark.parametrize("data_dims", [2, 3]) @pytest.mark.parametrize("channels", [True, False]) @pytest.mark.parametrize("task", ["distance", "onehot", "affs"]) @pytest.mark.parametrize("trainer", [lf("gunpowder_trainer")]) @pytest.mark.parametrize("architecture_dims", [2, 3]) @pytest.mark.parametrize("upsample", [True, False]) -# @pytest.mark.parametrize("upsample", [False]) # @pytest.mark.parametrize("batch_norm", [True, False]) @pytest.mark.parametrize("batch_norm", [False]) # @pytest.mark.parametrize("use_attention", [True, False]) From 4b703fe9dc9d1679085174f9573ad54418c1a6b9 Mon Sep 17 00:00:00 2001 From: mzouink Date: Tue, 19 Nov 2024 14:38:35 +0000 Subject: [PATCH 41/43] :art: Format Python code with psf/black --- dacapo/experiments/tasks/hot_distance_task.py | 1 + .../tasks/hot_distance_task_config.py | 3 +-- .../post_processors/argmax_post_processor.py | 2 +- .../post_processors/threshold_post_processor.py | 16 +++++++++------- .../tasks/predictors/distance_predictor.py | 2 +- .../tasks/predictors/hot_distance_predictor.py | 8 +++++++- dacapo/predict_local.py | 6 +++--- tests/operations/test_architecture.py | 1 - tests/operations/test_mini.py | 8 +++++++- tests/operations/test_train.py | 1 - tests/operations/test_validate.py | 1 - 11 files changed, 30 insertions(+), 19 deletions(-) diff --git a/dacapo/experiments/tasks/hot_distance_task.py b/dacapo/experiments/tasks/hot_distance_task.py index 382eaf8b1..3d86da131 100644 --- a/dacapo/experiments/tasks/hot_distance_task.py +++ b/dacapo/experiments/tasks/hot_distance_task.py @@ -6,6 +6,7 @@ import warnings + class HotDistanceTask(Task): """ A class to represent a hot distance task that use binary prediction and distance prediction. diff --git a/dacapo/experiments/tasks/hot_distance_task_config.py b/dacapo/experiments/tasks/hot_distance_task_config.py index d140e38e4..7e0cc37aa 100644 --- a/dacapo/experiments/tasks/hot_distance_task_config.py +++ b/dacapo/experiments/tasks/hot_distance_task_config.py @@ -57,7 +57,6 @@ class HotDistanceTaskConfig(TaskConfig): }, ) - kernel_size: int | None = attr.ib( default=None, - ) \ No newline at end of file + ) diff --git a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py index a88b89267..34cb0245d 100644 --- a/dacapo/experiments/tasks/post_processors/argmax_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/argmax_post_processor.py @@ -133,7 +133,7 @@ def process( overwrite=True, ) - read_roi = Roi((0,)*block_size.dims, block_size) + read_roi = Roi((0,) * block_size.dims, block_size) input_array = open_ds( f"{self.prediction_array_identifier.container.path}/{self.prediction_array_identifier.dataset}" ) diff --git a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py index 778064fcc..24ecead7a 100644 --- a/dacapo/experiments/tasks/post_processors/threshold_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/threshold_post_processor.py @@ -111,13 +111,15 @@ def process( if self.prediction_array._source_data.chunks is not None: block_size = self.prediction_array._source_data.chunks - write_size = Coordinate([ - b * v - for b, v in zip( - block_size[-self.prediction_array.dims :], - self.prediction_array.voxel_size, - ) - ]) + write_size = Coordinate( + [ + b * v + for b, v in zip( + block_size[-self.prediction_array.dims :], + self.prediction_array.voxel_size, + ) + ] + ) output_array = create_from_identifier( output_array_identifier, self.prediction_array.axis_names, diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index 172db065a..741e14db6 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -314,7 +314,7 @@ def process( channel_dim = True else: raise ValueError("Cannot handle multiple channel dims") - + if not channel_dim: labels = labels[np.newaxis] diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py index 4a18a3154..f2ec4f874 100644 --- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py @@ -49,7 +49,13 @@ class HotDistancePredictor(Predictor): This is a subclass of Predictor. """ - def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool, kernel_size: int): + def __init__( + self, + channels: List[str], + scale_factor: float, + mask_distances: bool, + kernel_size: int, + ): """ Initializes the HotDistancePredictor. diff --git a/dacapo/predict_local.py b/dacapo/predict_local.py index 88aaa5cfc..f1760ff9f 100644 --- a/dacapo/predict_local.py +++ b/dacapo/predict_local.py @@ -75,8 +75,8 @@ def predict( model_device = str(next(model.parameters()).device).split(":")[0] - assert ( - model_device == str(device) + assert model_device == str( + device ), f"Model is not on the right device, Model: {model_device}, Compute device: {device}" def predict_fn(block): @@ -122,7 +122,7 @@ def predict_fn(block): task = daisy.Task( f"predict_{out_container}_{out_dataset}", total_roi=input_roi, - read_roi=Roi((0,)*input_size.dims, input_size), + read_roi=Roi((0,) * input_size.dims, input_size), write_roi=Roi(context, output_size), process_function=predict_fn, check_function=None, diff --git a/tests/operations/test_architecture.py b/tests/operations/test_architecture.py index 2be724d07..e3e569a4b 100644 --- a/tests/operations/test_architecture.py +++ b/tests/operations/test_architecture.py @@ -68,7 +68,6 @@ def test_conv_dims( raise ValueError(f"Conv2d found in 3d unet {name}") - @pytest.mark.parametrize( "run_config", [ diff --git a/tests/operations/test_mini.py b/tests/operations/test_mini.py index abc49239a..f50705538 100644 --- a/tests/operations/test_mini.py +++ b/tests/operations/test_mini.py @@ -59,7 +59,13 @@ def test_mini( ) task_config = build_test_task_config(task, data_dims, architecture_dims) architecture_config = build_test_architecture_config( - data_dims, architecture_dims, channels, batch_norm, upsample, use_attention, padding + data_dims, + architecture_dims, + channels, + batch_norm, + upsample, + use_attention, + padding, ) run_config = RunConfig( diff --git a/tests/operations/test_train.py b/tests/operations/test_train.py index ae8ad1760..ad45b848c 100644 --- a/tests/operations/test_train.py +++ b/tests/operations/test_train.py @@ -58,4 +58,3 @@ def test_large( training_stats = stats_store.retrieve_training_stats(run_config.name) assert training_stats.trained_until() == run_config.num_iterations - diff --git a/tests/operations/test_validate.py b/tests/operations/test_validate.py index 4df49a602..8a4d8cf26 100644 --- a/tests/operations/test_validate.py +++ b/tests/operations/test_validate.py @@ -38,4 +38,3 @@ def test_large( # test validating weights that don't exist with pytest.raises(FileNotFoundError): validate(run_config.name, 2) - From cac454241960c8555e513c2dc7245874e48d414c Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 19 Nov 2024 11:03:06 -0500 Subject: [PATCH 42/43] v0.3.5 --- dacapo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/__init__.py b/dacapo/__init__.py index 3b06000aa..e40e7277a 100644 --- a/dacapo/__init__.py +++ b/dacapo/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.3.2" +__version__ = "0.3.5" __version_info__ = tuple(int(i) for i in __version__.split(".")) from .options import Options # noqa From 53217ede9c232b76677593b2c861d819debd6a9d Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Tue, 19 Nov 2024 11:05:52 -0500 Subject: [PATCH 43/43] fix mypy --- dacapo/experiments/datasplits/datasets/dataset_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dacapo/experiments/datasplits/datasets/dataset_config.py b/dacapo/experiments/datasplits/datasets/dataset_config.py index 4217eb00e..4a4ba3018 100644 --- a/dacapo/experiments/datasplits/datasets/dataset_config.py +++ b/dacapo/experiments/datasplits/datasets/dataset_config.py @@ -63,3 +63,6 @@ def verify(self) -> Tuple[bool, str]: This method is used to validate the configuration of the dataset. """ return True, "No validation for this DataSet" + + def __hash__(self) -> int: + return hash(self.name)