From 9c3c5b65f7fec4d3368a6169cc801069e902237b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 25 Oct 2023 22:19:44 -0600 Subject: [PATCH 01/11] Add grid_bounds_to_polygons Modified from https://notebooksharing.space/view/c6c1f3a7d0c260724115eaa2bf78f3738b275f7f633c1558639e7bbd75b31456#displayOptions= Co-authored-by: Ryan Abernathey --- cf_xarray/geometry.py | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 148a8d1b..0feae423 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -293,3 +293,64 @@ def cf_to_points(ds: xr.Dataset): j += n return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) + + +def grid_bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: + """ + Converts a regular 2D lat/lon grid to a 2D array of shapely polygons. + + Modified from https://notebooksharing.space/view/c6c1f3a7d0c260724115eaa2bf78f3738b275f7f633c1558639e7bbd75b31456. + + Parameters + ---------- + ds : xr.Dataset + Dataset with "latitude" and "longitude" variables as well as their bounds variables. + 1D "latitude" and "longitude" variables are supported. This function will automatically + broadcast them against each other. + + Returns + ------- + DataArray + DataArray with shapely polygon per grid cell. + """ + from shapely import Polygon + + grid = ds.cf[["latitude", "longitude"]].load().reset_coords() + bounds = ds.cf.bounds + + assert "latitude" in bounds + assert "longitude" in bounds + (lon_bounds,) = bounds["longitude"] + (lat_bounds,) = bounds["latitude"] + + (points,) = xr.broadcast(grid) + + bounds_dim = grid.cf.get_bounds_dim_name("latitude") + points = points.transpose(..., bounds_dim) + assert points.sizes[bounds_dim] == 2 + + lonbnd = points[lon_bounds].data + latbnd = points[lat_bounds].data + + # geopandas needs this + expanded_lon = lonbnd[..., [0, 0, 1, 1]] + mask = expanded_lon[..., 0] >= 180 + expanded_lon[mask, :] = expanded_lon[mask, :] - 360 + + # these magic numbers are OK : + # - 4 corners to a polygon, and + # - 2 from stacking lat, lon along the last axis + # flatten here to make iteration easier. It would be nice to avoid that. + # potentially with just np.vectorize. The polygon creation is the real slow bit. + # Shapely's MultiPolygon also iterates over a list in Python... + blocked = np.stack([expanded_lon, latbnd[..., [0, 1, 1, 0]]], axis=-1).reshape( + -1, 4, 2 + ) + polyarray = np.array( + [Polygon(blocked[i, ...]) for i in range(blocked.shape[0])], dtype="O" + ) + newshape = latbnd.shape[:-1] + polyarray = polyarray.reshape(newshape) + boxes = points[lon_bounds][..., 0].copy(data=polyarray) + + return boxes From d26bf4a59161060dc8112b6d0917ec8889e7c7bf Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 26 Oct 2023 08:22:26 -0600 Subject: [PATCH 02/11] Speedup! Co-authored-by: Pascal Bourgault --- cf_xarray/geometry.py | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 0feae423..db7e7f79 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -295,7 +295,7 @@ def cf_to_points(ds: xr.Dataset): return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) -def grid_bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: +def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: """ Converts a regular 2D lat/lon grid to a 2D array of shapely polygons. @@ -313,7 +313,7 @@ def grid_bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: DataArray DataArray with shapely polygon per grid cell. """ - from shapely import Polygon + import shapely grid = ds.cf[["latitude", "longitude"]].load().reset_coords() bounds = ds.cf.bounds @@ -327,30 +327,24 @@ def grid_bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: bounds_dim = grid.cf.get_bounds_dim_name("latitude") points = points.transpose(..., bounds_dim) - assert points.sizes[bounds_dim] == 2 - lonbnd = points[lon_bounds].data latbnd = points[lat_bounds].data - # geopandas needs this - expanded_lon = lonbnd[..., [0, 0, 1, 1]] - mask = expanded_lon[..., 0] >= 180 - expanded_lon[mask, :] = expanded_lon[mask, :] - 360 - - # these magic numbers are OK : - # - 4 corners to a polygon, and - # - 2 from stacking lat, lon along the last axis - # flatten here to make iteration easier. It would be nice to avoid that. - # potentially with just np.vectorize. The polygon creation is the real slow bit. - # Shapely's MultiPolygon also iterates over a list in Python... - blocked = np.stack([expanded_lon, latbnd[..., [0, 1, 1, 0]]], axis=-1).reshape( - -1, 4, 2 - ) - polyarray = np.array( - [Polygon(blocked[i, ...]) for i in range(blocked.shape[0])], dtype="O" - ) - newshape = latbnd.shape[:-1] - polyarray = polyarray.reshape(newshape) + if points.sizes[bounds_dim] == 2: + # geopandas needs this + lonbnd = lonbnd[..., [0, 0, 1, 1]] + mask = lonbnd[..., 0] >= 180 + lonbnd[mask, :] = lonbnd[mask, :] - 360 + latbnd = latbnd[..., [0, 1, 1, 0]] + + elif points.sizes[bounds_dim] == 4: + raise NotImplementedError + else: + raise ValueError( + f"The size of the detected bounds or vertex dimension {bounds_dim} is not 2 or 4." + ) + + polyarray = shapely.polygons(shapely.linearrings(lonbnd, latbnd)) boxes = points[lon_bounds][..., 0].copy(data=polyarray) return boxes From 0370b53e5572e68ea4dc333bf546421d2a38b30d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 26 Oct 2023 08:26:20 -0600 Subject: [PATCH 03/11] ignore shapely types --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f9c9150a..930a1cca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,7 +132,7 @@ module=[ "pint", "matplotlib", "pytest", - "shapely.geometry", + "shapely.*", "xarray.core.pycompat", ] ignore_missing_imports = true From 83f8349c915c25287b60646fafb439f7256af8f3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 26 Oct 2023 08:40:27 -0600 Subject: [PATCH 04/11] Support 4 point form too --- cf_xarray/geometry.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index db7e7f79..7c399131 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -315,15 +315,22 @@ def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: """ import shapely - grid = ds.cf[["latitude", "longitude"]].load().reset_coords() - bounds = ds.cf.bounds + grid = ds.cf[["latitude", "longitude"]].load() + bounds = grid.cf.bounds + dims = grid.cf.dims + + if "latitude" in dims or "longitude" in dims: + # for 1D lat, lon, this allows them to be + # broadcast against each other + grid = grid.reset_coords() assert "latitude" in bounds assert "longitude" in bounds (lon_bounds,) = bounds["longitude"] (lat_bounds,) = bounds["latitude"] - (points,) = xr.broadcast(grid) + with xr.set_options(keep_attrs=True): + (points,) = xr.broadcast(grid) bounds_dim = grid.cf.get_bounds_dim_name("latitude") points = points.transpose(..., bounds_dim) @@ -337,14 +344,14 @@ def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: lonbnd[mask, :] = lonbnd[mask, :] - 360 latbnd = latbnd[..., [0, 1, 1, 0]] - elif points.sizes[bounds_dim] == 4: - raise NotImplementedError - else: + elif points.sizes[bounds_dim] != 4: raise ValueError( f"The size of the detected bounds or vertex dimension {bounds_dim} is not 2 or 4." ) polyarray = shapely.polygons(shapely.linearrings(lonbnd, latbnd)) - boxes = points[lon_bounds][..., 0].copy(data=polyarray) + + # 'geometry' is a blessed name in geopandas. + boxes = points[lon_bounds][..., 0].copy(data=polyarray).rename("geometry") return boxes From 8a6b79ff4b223e62c85ea8b6e169342fbc7639e2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 26 Oct 2023 08:47:13 -0600 Subject: [PATCH 05/11] Add top-level geometry import --- cf_xarray/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cf_xarray/__init__.py b/cf_xarray/__init__.py index 782317e3..c33ae987 100644 --- a/cf_xarray/__init__.py +++ b/cf_xarray/__init__.py @@ -9,4 +9,7 @@ from .options import set_options # noqa from .utils import _get_version +from . import geometry # noqa + +# __version__ = _get_version() From 96a00d802006fabf2f970619fd40ecb5b54123ee Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 26 Oct 2023 11:32:24 -0600 Subject: [PATCH 06/11] fix --- cf_xarray/geometry.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 7c399131..3765cfdb 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -338,10 +338,7 @@ def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: latbnd = points[lat_bounds].data if points.sizes[bounds_dim] == 2: - # geopandas needs this lonbnd = lonbnd[..., [0, 0, 1, 1]] - mask = lonbnd[..., 0] >= 180 - lonbnd[mask, :] = lonbnd[mask, :] - 360 latbnd = latbnd[..., [0, 1, 1, 0]] elif points.sizes[bounds_dim] != 4: @@ -349,6 +346,10 @@ def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: f"The size of the detected bounds or vertex dimension {bounds_dim} is not 2 or 4." ) + # geopandas needs this + mask = lonbnd[..., 0] >= 180 + lonbnd[mask, :] = lonbnd[mask, :] - 360 + polyarray = shapely.polygons(shapely.linearrings(lonbnd, latbnd)) # 'geometry' is a blessed name in geopandas. From 0db0b967a0c433afd0572c517e4350ecfb903f2c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Dec 2023 04:36:14 +0000 Subject: [PATCH 07/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cf_xarray/geometry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index b349802a..40fa1743 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -438,8 +438,8 @@ def cf_to_lines(ds: xr.Dataset): geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) - - + + def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: """ Converts a regular 2D lat/lon grid to a 2D array of shapely polygons. From d33610e21ea036c19d60011d75560e8a52c1df20 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 10 Dec 2023 20:36:46 -0800 Subject: [PATCH 08/11] Fix --- cf_xarray/geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 40fa1743..26c12293 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -440,7 +440,7 @@ def cf_to_lines(ds: xr.Dataset): return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) - def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: +def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: """ Converts a regular 2D lat/lon grid to a 2D array of shapely polygons. From c0484104dfb14a2ef0f656af42133a81f16df28d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 13 Jan 2024 15:12:09 -0700 Subject: [PATCH 09/11] WIP --- cf_xarray/geometry.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 26c12293..0f5e7f07 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -462,7 +462,9 @@ def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: grid = ds.cf[["latitude", "longitude"]].load() bounds = grid.cf.bounds + coordinates = grid.cf.coordinates dims = grid.cf.dims + bounds_dim = grid.cf.get_bounds_dim_name("latitude") if "latitude" in dims or "longitude" in dims: # for 1D lat, lon, this allows them to be @@ -471,13 +473,23 @@ def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: assert "latitude" in bounds assert "longitude" in bounds + (lon,) = coordinates["longitude"] (lon_bounds,) = bounds["longitude"] + (lat,) = coordinates["latitude"] (lat_bounds,) = bounds["latitude"] with xr.set_options(keep_attrs=True): - (points,) = xr.broadcast(grid) + broadcasted = xr.broadcast( + grid[lon], + grid[lat], + grid[lon_bounds], + grid[lat_bounds], + exclude=bounds_dim, + ) + asdict = dict(zip([lon, lat, lon_bounds, lat_bounds], broadcasted)) + # display(asdict) + points = xr.Dataset(asdict) - bounds_dim = grid.cf.get_bounds_dim_name("latitude") points = points.transpose(..., bounds_dim) lonbnd = points[lon_bounds].data latbnd = points[lat_bounds].data From 1f7ceae6e0902ca0cad3227c9726e77ccbda1855 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 24 Jul 2024 12:37:14 -0600 Subject: [PATCH 10/11] rename --- cf_xarray/geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 6ca28702..5988f6a3 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -957,7 +957,7 @@ def cf_to_polygons(ds: xr.Dataset): ).drop_vars(node_count_name) -def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: +def bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: """ Converts a regular 2D lat/lon grid to a 2D array of shapely polygons. From 25d822466fa66506e418f662f1801857e2b612c7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 24 Jul 2024 12:51:41 -0600 Subject: [PATCH 11/11] Fixes. --- cf_xarray/geometry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 5988f6a3..66d2aedd 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -980,7 +980,7 @@ def bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: grid = ds.cf[["latitude", "longitude"]].load() bounds = grid.cf.bounds coordinates = grid.cf.coordinates - dims = grid.cf.dims + dims = tuple(grid.cf.sizes) bounds_dim = grid.cf.get_bounds_dim_name("latitude") if "latitude" in dims or "longitude" in dims: @@ -999,9 +999,9 @@ def bounds_to_polygons(ds: xr.Dataset) -> xr.DataArray: broadcasted = xr.broadcast( grid[lon], grid[lat], + ) + xr.broadcast( grid[lon_bounds], grid[lat_bounds], - exclude=bounds_dim, ) asdict = dict(zip([lon, lat, lon_bounds, lat_bounds], broadcasted)) # display(asdict)