Skip to content

Commit

Permalink
Merge pull request #114 from IGNF/fix-subsample
Browse files Browse the repository at this point in the history
Fix edgecase with 1 input value that is not selected in subsample_data
  • Loading branch information
leavauchier authored Mar 13, 2024
2 parents 83b6b21 + 3fa05eb commit 8c01bdf
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 10 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# CHANGELOG
### 3.8.2
- fix: type error in edge case when dropping points in transforms
- fix: points not dropped case in subsampling when the subtile contains only one point
- fix: type error in edge case when dropping points in DropPointsByClass (when there is only one remaining point)

### 3.8.1
- fix: propagate input las format to output las (in particular epsg which comes either from input or config)
Expand Down
17 changes: 12 additions & 5 deletions myria3d/pctl/transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,19 @@ def __call__(self, data: Data):
return data


def subsample_data(data, num_nodes, choice):
def subsample_data(data, num_nodes, choice: torch.Tensor):
# TODO: get num_nodes from data.num_nodes instead to simplify signature
out_nodes = torch.sum(choice) if choice.dtype == torch.bool else choice.size(0)
for key, item in data:
if key == "num_nodes":
data.num_nodes = choice.size(0)
data.num_nodes = out_nodes
elif key in ["copies", "idx_in_original_cloud"]:
# Do not subsample copies of the original point cloud or indices of the original points
# contained in the patch
continue
elif bool(re.search("edge", key)):
continue
elif torch.is_tensor(item) and item.size(0) == num_nodes and item.size(0) != 1:
data[key] = item[choice]
elif isinstance(item, np.ndarray) and item.shape[0] == num_nodes and item.shape[0] != 1:
elif torch.is_tensor(item) and item.size(0) == num_nodes:
data[key] = item[choice]

return data
Expand Down Expand Up @@ -237,5 +240,9 @@ def __call__(self, data):
if points_to_drop.sum() > 0:
points_to_keep = torch.logical_not(points_to_drop)
data = subsample_data(data, num_nodes=data.num_nodes, choice=points_to_keep)
# Here we also subsample these idx since we do not need to interpolate these points back
# It supposes that DropPointsByClass is run before copying the original point cloud
if "idx_in_original_cloud" in data:
data.idx_in_original_cloud = data.idx_in_original_cloud[points_to_keep.numpy()]

return data
29 changes: 27 additions & 2 deletions tests/myria3d/models/test_model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import hydra
from pytorch_lightning import LightningDataModule
from pytorch_lightning import LightningDataModule, LightningModule
from tests.conftest import make_default_hydra_cfg

from myria3d.models.model import Model
from myria3d.utils import utils # noqa


def test_model_get_batch_tensor_by_enumeration():
Expand All @@ -11,7 +12,7 @@ def test_model_get_batch_tensor_by_enumeration():
"predict.src_las=tests/data/toy_dataset_src/862000_6652000.classified_toy_dataset.100mx100m.las",
"datamodule.epsg=2154",
"work_dir=./../../..",
"datamodule.subtile_width=1",
"datamodule.subtile_width=1", # Extreme case with very few points per subtile
"datamodule.hdf5_file_path=null",
]
)
Expand All @@ -26,3 +27,27 @@ def test_model_get_batch_tensor_by_enumeration():
for batch in datamodule.predict_dataloader():
# Check that no error is raised ("TypeError: object of type 'numpy.int64' has no len()")
_ = model._get_batch_tensor_by_enumeration(batch.idx_in_original_cloud)


def test_model_forward():
config = make_default_hydra_cfg(
overrides=[
"predict.src_las=tests/data/toy_dataset_src/862000_6652000.classified_toy_dataset.100mx100m.las",
"datamodule.epsg=2154",
"work_dir=./../../..",
"datamodule.subtile_width=1", # Extreme case with very few points per subtile
"datamodule.hdf5_file_path=null",
]
)

datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
datamodule._set_predict_data(config.predict.src_las)

model: LightningModule = hydra.utils.instantiate(config.model)
device = utils.define_device_from_config_param(config.predict.gpus)
model.to(device)
model.eval()
print(model.model)
for batch in datamodule.predict_dataloader():
# Check that no error is raised
targets, logits = model.forward(batch)
112 changes: 110 additions & 2 deletions tests/myria3d/pctl/transforms/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,99 @@
import torch
import torch_geometric

from myria3d.pctl.transforms.transforms import DropPointsByClass, TargetTransform
from myria3d.pctl.transforms.transforms import (
DropPointsByClass,
MinimumNumNodes,
TargetTransform,
subsample_data,
)


@pytest.mark.parametrize(
"x,idx,choice,nb_out_nodes",
[
# Standard use case with choice contiaining indices
(
torch.Tensor([10, 11, 12, 13, 14]),
np.array([20, 21, 22, 23, 24]),
torch.IntTensor([0, 1, 4]),
3,
),
# Edge case with choice containing indices: select no point
(
torch.Tensor([10, 11, 12, 13, 14]),
np.array([20, 21, 22, 23, 24]),
torch.IntTensor([]),
0,
),
# Edge case with choice containing indices: select one point
(
torch.Tensor([10, 11, 12, 13, 14]),
np.array([20, 21, 22, 23, 24]),
torch.IntTensor([1]),
1,
),
# Edge case with choice containing indices: input array with one point
(
torch.Tensor([10]),
np.array([20]),
torch.IntTensor([0]),
1,
),
# Edge case with choice containing indices: input array with one point
(
torch.Tensor([10]),
np.array([20]),
torch.IntTensor([]),
0,
),
# Standard use case with choice as boolean array
(
torch.Tensor([10, 11, 12, 13, 14]),
np.array([20, 21, 22, 23, 24]),
torch.BoolTensor([True, True, False, True, False]),
3,
),
# Edge case with choice as boolean array: select no point
(
torch.Tensor([10, 11, 12, 13, 14]),
np.array([20, 21, 22, 23, 24]),
torch.BoolTensor([False, False, False, False, False]),
0,
),
# Edge case with choice as boolean array: select one point
(
torch.Tensor([10, 11, 12, 13, 14]),
np.array([20, 21, 22, 23, 24]),
torch.BoolTensor([False, True, False, False, False]),
1,
),
# Edge case with choice as boolean array: input array with one point
(
torch.Tensor([10]),
np.array([20]),
torch.BoolTensor([True]),
1,
),
# Edge case with choice as boolean array: input array with one point
(
torch.Tensor([10]),
np.array([20]),
torch.BoolTensor([False]),
0,
),
],
)
def test_subsample_data(x, idx, choice, nb_out_nodes):
num_nodes = x.size(0)
data = torch_geometric.data.Data(x=x, idx_in_original_cloud=idx, num_nodes=num_nodes)
transformed_data = subsample_data(data, num_nodes, choice)
assert transformed_data.num_nodes == nb_out_nodes
assert isinstance(transformed_data.x, torch.Tensor)
assert transformed_data.x.size(0) == nb_out_nodes
assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray)
# Check that "idx_in_original_cloud" key is not modified
assert transformed_data.idx_in_original_cloud.shape[0] == num_nodes


def test_TargetTransform_with_valid_config():
Expand Down Expand Up @@ -42,7 +134,6 @@ def test_DropPointsByClass():
transformed_data = drop_transforms(data)
assert torch.equal(transformed_data.y, torch.Tensor([1, 2]))
assert transformed_data.x.size(0) == 2
print(type(transformed_data.idx_in_original_cloud))
assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray)
assert transformed_data.idx_in_original_cloud.size == 2
assert np.all(transformed_data.idx_in_original_cloud == np.array([0, 3]))
Expand All @@ -66,3 +157,20 @@ def test_DropPointsByClass():
assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray)
assert transformed_data.idx_in_original_cloud.shape[0] == 1
assert np.all(transformed_data.idx_in_original_cloud == np.array([0]))


@pytest.mark.parametrize("input_nodes,min_nodes", [(5, 10), (1, 10), (15, 10)])
def test_MinimumNumNodes(input_nodes, min_nodes):
x = torch.rand((input_nodes, 3))
idx = np.arange(input_nodes) # Not a tensor
data = torch_geometric.data.Data(x=x, idx_in_original_cloud=idx)
transform = MinimumNumNodes(min_nodes)

transformed_data = transform(data)
expected_nodes = max(input_nodes, min_nodes)
assert transformed_data.num_nodes == expected_nodes
assert isinstance(transformed_data.x, torch.Tensor)
assert transformed_data.x.size(0) == expected_nodes
# Check that "idx_in_original_cloud" key is not modified
assert isinstance(transformed_data.idx_in_original_cloud, np.ndarray)
assert transformed_data.idx_in_original_cloud.shape[0] == input_nodes

0 comments on commit 8c01bdf

Please sign in to comment.