diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 00000000..9d1e0987 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,5 @@ +changelog: + exclude: + authors: + - dependabot + - pre-commit-ci diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5a8657b9..1cd7461f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -50,7 +50,7 @@ jobs: run: | pytest -n auto --cov=./ --cov-report=xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v3.1.4 + uses: codecov/codecov-action@v4.5.0 with: file: ./coverage.xml flags: unittests @@ -114,7 +114,7 @@ jobs: run: | python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report cf_xarray/ - name: Upload mypy coverage to Codecov - uses: codecov/codecov-action@v3.1.4 + uses: codecov/codecov-action@v4.5.0 with: file: mypy_report/cobertura.xml flags: mypy diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index eedac70b..ec838552 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: "3.10" @@ -41,7 +41,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist @@ -50,11 +50,11 @@ jobs: needs: build-artifacts runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: "3.10" - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist @@ -72,7 +72,7 @@ jobs: - name: Publish package to TestPyPI if: github.event_name == 'push' - uses: pypa/gh-action-pypi-publish@v1.8.11 + uses: pypa/gh-action-pypi-publish@v1.9.0 with: password: ${{ secrets.TESTPYPI_TOKEN }} repository_url: https://test.pypi.org/legacy/ @@ -91,11 +91,11 @@ jobs: id-token: write steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.11 + uses: pypa/gh-action-pypi-publish@v1.9.0 with: verbose: true diff --git a/.github/workflows/testpypi-release.yaml b/.github/workflows/testpypi-release.yaml index 9df46af5..86c2b900 100644 --- a/.github/workflows/testpypi-release.yaml +++ b/.github/workflows/testpypi-release.yaml @@ -21,7 +21,7 @@ jobs: with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: "3.10" @@ -53,7 +53,7 @@ jobs: echo "✅ Looks good" fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist @@ -62,11 +62,11 @@ jobs: needs: build-artifacts runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: "3.10" - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 9923774a..f9116cfa 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -22,7 +22,7 @@ jobs: upstream-dev: name: upstream-dev runs-on: ubuntu-latest - if: ${{ (contains( github.event.pull_request.labels.*.name, 'test-upstream') && github.event_name == 'pull_request') || github.event_name == 'workflow_dispatch' }} + if: ${{ (contains( github.event.pull_request.labels.*.name, 'test-upstream') && github.event_name == 'pull_request') || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }} defaults: run: shell: bash -l {0} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1c2e527..1a52d50f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,20 +3,20 @@ ci: repos: - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.16.0 hooks: - id: pyupgrade args: ["--py39-plus"] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.1.1' + rev: 'v0.5.0' hooks: - id: ruff args: ["--show-fixes", "--fix"] - repo: https://github.com/psf/black-pre-commit-mirror - rev: 23.10.1 + rev: 24.4.2 hooks: - id: black @@ -36,7 +36,7 @@ repos: - mdformat-myst - repo: https://github.com/nbQA-dev/nbQA - rev: 1.7.0 + rev: 1.8.5 hooks: - id: nbqa-black - id: nbqa-ruff @@ -47,7 +47,7 @@ repos: additional_dependencies: [mdformat==0.7.17] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -56,7 +56,7 @@ repos: - id: debug-statements - repo: https://github.com/keewis/blackdoc - rev: v0.3.8 + rev: v0.3.9 hooks: - id: blackdoc files: .+\.py$ @@ -67,7 +67,7 @@ repos: - id: validate-cff - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.15 + rev: v0.18 hooks: - id: validate-pyproject diff --git a/cf_xarray/__init__.py b/cf_xarray/__init__.py index c33ae987..6ae0a916 100644 --- a/cf_xarray/__init__.py +++ b/cf_xarray/__init__.py @@ -11,5 +11,4 @@ from . import geometry # noqa -# __version__ = _get_version() diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index a10c4886..a915335b 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -28,8 +28,10 @@ from . import sgrid from .criteria import ( _DSG_ROLES, + _GEOMETRY_TYPES, cf_role_criteria, coordinate_criteria, + geometry_var_criteria, grid_mapping_var_criteria, regex, ) @@ -39,6 +41,7 @@ _format_data_vars, _format_dsg_roles, _format_flags, + _format_geometries, _format_sgrid, _maybe_panel, ) @@ -198,7 +201,9 @@ def _get_groupby_time_accessor( def _get_custom_criteria( - obj: DataArray | Dataset, key: Hashable, criteria: Mapping | None = None + obj: DataArray | Dataset, + key: Hashable, + criteria: Iterable[Mapping] | Mapping | None = None, ) -> list[Hashable]: """ Translate from axis, coord, or custom name to variable name. @@ -227,18 +232,16 @@ def _get_custom_criteria( except ImportError: from re import match as regex_match # type: ignore[no-redef] - if isinstance(obj, DataArray): - obj = obj._to_temp_dataset() - variables = obj._variables - if criteria is None: if not OPTIONS["custom_criteria"]: return [] criteria = OPTIONS["custom_criteria"] - if criteria is not None: - criteria_iter = always_iterable(criteria, allowed=(tuple, list, set)) + if isinstance(obj, DataArray): + obj = obj._to_temp_dataset() + variables = obj._variables + criteria_iter = always_iterable(criteria, allowed=(tuple, list, set)) criteria_map = ChainMap(*criteria_iter) results: set = set() if key in criteria_map: @@ -367,6 +370,21 @@ def _get_measure(obj: DataArray | Dataset, key: str) -> list[str]: return list(results) +def _parse_related_geometry_vars(attrs: Mapping) -> tuple[Hashable]: + names = itertools.chain( + *[ + attrs.get(attr, "").split(" ") + for attr in [ + "interior_ring", + "node_coordinates", + "node_count", + "part_node_count", + ] + ] + ) + return tuple(n for n in names if n) + + def _get_bounds(obj: DataArray | Dataset, key: Hashable) -> list[Hashable]: """ Translate from key (either CF key or variable name) to its bounds' variable names. @@ -470,8 +488,14 @@ def _get_all(obj: DataArray | Dataset, key: Hashable) -> list[Hashable]: """ all_mappers: tuple[Mapper] = ( _get_custom_criteria, - functools.partial(_get_custom_criteria, criteria=cf_role_criteria), # type: ignore[assignment] - functools.partial(_get_custom_criteria, criteria=grid_mapping_var_criteria), + functools.partial( + _get_custom_criteria, + criteria=( + cf_role_criteria, + grid_mapping_var_criteria, + geometry_var_criteria, + ), + ), # type: ignore[assignment] _get_axis_coord, _get_measure, _get_grid_mapping_name, @@ -721,8 +745,7 @@ def _getitem( accessor: CFAccessor, key: Hashable, skip: list[Literal["coords", "measures"]] | None = None, -) -> DataArray: - ... +) -> DataArray: ... @overload @@ -730,8 +753,7 @@ def _getitem( accessor: CFAccessor, key: Iterable[Hashable], skip: list[Literal["coords", "measures"]] | None = None, -) -> Dataset: - ... +) -> Dataset: ... def _getitem( @@ -823,6 +845,23 @@ def check_results(names, key): successful[k] = bool(grid_mapping) if grid_mapping: varnames.extend(grid_mapping) + elif "geometries" not in skip and (k == "geometry" or k in _GEOMETRY_TYPES): + geometries = _get_all(obj, k) + if geometries and k in _GEOMETRY_TYPES: + new = itertools.chain( + _parse_related_geometry_vars( + ChainMap(obj[g].attrs, obj[g].encoding) + ) + for g in geometries + ) + geometries.extend(*new) + if len(geometries) > 1 and scalar_key: + raise ValueError( + f"CF geometries must be represented by an Xarray Dataset. To request a Dataset in return please pass `[{k!r}]` instead." + ) + successful[k] = bool(geometries) + if geometries: + varnames.extend(geometries) elif k in custom_criteria or k in cf_role_criteria: names = _get_all(obj, k) check_results(names, k) @@ -1561,8 +1600,7 @@ def _generate_repr(self, rich=False): _format_flags(self, rich), title="Flag Variable", rich=rich ) - roles = self.cf_roles - if roles: + if roles := self.cf_roles: if any(role in roles for role in _DSG_ROLES): yield _maybe_panel( _format_dsg_roles(self, dims, rich), @@ -1578,6 +1616,13 @@ def _generate_repr(self, rich=False): rich=rich, ) + if self.geometries: + yield _maybe_panel( + _format_geometries(self, dims, rich), + title="Geometries", + rich=rich, + ) + yield _maybe_panel( _format_coordinates(self, dims, coords, rich), title="Coordinates", @@ -1757,12 +1802,42 @@ def cf_roles(self) -> dict[str, list[Hashable]]: vardict: dict[str, list[Hashable]] = {} for k, v in variables.items(): - if "cf_role" in v.attrs: - role = v.attrs["cf_role"] + attrs_or_encoding = ChainMap(v.attrs, v.encoding) + if role := attrs_or_encoding.get("cf_role", None): vardict[role] = vardict.setdefault(role, []) + [k] return {role_: sort_maybe_hashable(v) for role_, v in vardict.items()} + @property + def geometries(self) -> dict[str, list[Hashable]]: + """ + Mapping geometry type names to variable names. + + Returns + ------- + dict + Dictionary mapping geometry names to variable names. + + References + ---------- + Please refer to the CF conventions document : http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#coordinates-metadata + """ + vardict: dict[str, list[Hashable]] = {} + + if isinstance(self._obj, Dataset): + variables = self._obj._variables + elif isinstance(self._obj, DataArray): + variables = {"_": self._obj._variable} + + for v in variables.values(): + attrs_or_encoding = ChainMap(v.attrs, v.encoding) + if geometry := attrs_or_encoding.get("geometry", None): + gtype = self._obj[geometry].attrs["geometry_type"] + vardict.setdefault(gtype, []) + if geometry not in vardict[gtype]: + vardict[gtype] += [geometry] + return {type_: sort_maybe_hashable(v) for type_, v in vardict.items()} + def get_associated_variable_names( self, name: Hashable, skip_bounds: bool = False, error: bool = True ) -> dict[str, list[Hashable]]: @@ -1797,15 +1872,15 @@ def get_associated_variable_names( "bounds", "grid_mapping", "grid", + "geometry", ] coords: dict[str, list[Hashable]] = {k: [] for k in keys} attrs_or_encoding = ChainMap(self._obj[name].attrs, self._obj[name].encoding) - coordinates = attrs_or_encoding.get("coordinates", None) # Handles case where the coordinates attribute is None # This is used to tell xarray to not write a coordinates attribute - if coordinates: + if coordinates := attrs_or_encoding.get("coordinates", None): coords["coordinates"] = coordinates.split(" ") if "cell_measures" in attrs_or_encoding: @@ -1824,27 +1899,32 @@ def get_associated_variable_names( ) coords["cell_measures"] = [] - if ( - isinstance(self._obj, Dataset) - and "ancillary_variables" in attrs_or_encoding + if isinstance(self._obj, Dataset) and ( + anc := attrs_or_encoding.get("ancillary_variables", None) ): - coords["ancillary_variables"] = attrs_or_encoding[ - "ancillary_variables" - ].split(" ") + coords["ancillary_variables"] = anc.split(" ") if not skip_bounds: - if "bounds" in attrs_or_encoding: - coords["bounds"] = [attrs_or_encoding["bounds"]] + if bounds := attrs_or_encoding.get("bounds", None): + coords["bounds"] = [bounds] for dim in self._obj[name].dims: - dbounds = self._obj[dim].attrs.get("bounds", None) - if dbounds: + if dbounds := self._obj[dim].attrs.get("bounds", None): coords["bounds"].append(dbounds) - if "grid" in attrs_or_encoding: - coords["grid"] = [attrs_or_encoding["grid"]] + for attrname in ["grid", "grid_mapping"]: + if maybe := attrs_or_encoding.get(attrname, None): + coords[attrname] = [maybe] - if "grid_mapping" in attrs_or_encoding: - coords["grid_mapping"] = [attrs_or_encoding["grid_mapping"]] + more: Sequence[Hashable] = () + if geometry_var := attrs_or_encoding.get("geometry", None): + coords["geometry"] = [geometry_var] + _attrs = ChainMap( + self._obj[geometry_var].attrs, self._obj[geometry_var].encoding + ) + more = _parse_related_geometry_vars(_attrs) + elif "geometry_type" in attrs_or_encoding: + more = _parse_related_geometry_vars(attrs_or_encoding) + coords["geometry"].extend(more) allvars = itertools.chain(*coords.values()) missing = set(allvars) - set(self._maybe_to_dataset()._variables) @@ -2276,8 +2356,8 @@ def formula_terms(self) -> dict[Hashable, dict[str, str]]: # numpydoc ignore=SS References ---------- Please refer to the CF conventions document : - 1. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-vertical-coordinate - 2. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-v-coord. + 1. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#parametric-vertical-coordinate + 2. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#parametric-v-coord. Examples -------- @@ -2553,7 +2633,7 @@ def bounds_to_vertices( References ---------- - Please refer to the CF conventions document : http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#cell-boundaries. + Please refer to the CF conventions document : http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#cell-boundaries. """ if keys is None: coords = tuple(self.keys()) @@ -2762,8 +2842,8 @@ def formula_terms(self) -> dict[str, str]: # numpydoc ignore=SS06 References ---------- Please refer to the CF conventions document : - 1. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-vertical-coordinate - 2. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-v-coord. + 1. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#parametric-vertical-coordinate + 2. http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#parametric-v-coord. Examples -------- diff --git a/cf_xarray/coding.py b/cf_xarray/coding.py index 529db1f2..282eaf8f 100644 --- a/cf_xarray/coding.py +++ b/cf_xarray/coding.py @@ -1,6 +1,7 @@ """ Encoders and decoders for CF conventions not implemented by Xarray. """ + import numpy as np import pandas as pd import xarray as xr diff --git a/cf_xarray/criteria.py b/cf_xarray/criteria.py index 76299520..81b78ab8 100644 --- a/cf_xarray/criteria.py +++ b/cf_xarray/criteria.py @@ -12,7 +12,10 @@ from collections.abc import Mapping, MutableMapping from typing import Any +#: CF Roles understood by cf-xarray _DSG_ROLES = ["timeseries_id", "profile_id", "trajectory_id"] +#: Geometry types understood by cf-xarray +_GEOMETRY_TYPES = ("line", "point", "polygon") cf_role_criteria: Mapping[str, Mapping[str, str]] = { k: {"cf_role": k} @@ -31,6 +34,12 @@ "grid_mapping": {"grid_mapping_name": re.compile(".")} } +# A geometry container is anything with a geometry_type attribute +geometry_var_criteria: Mapping[str, Mapping[str, Any]] = { + "geometry": {"geometry_type": re.compile(".")}, + **{k: {"geometry_type": k} for k in _GEOMETRY_TYPES}, +} + coordinate_criteria: MutableMapping[str, MutableMapping[str, tuple]] = { "latitude": { "standard_name": ("latitude",), diff --git a/cf_xarray/datasets.py b/cf_xarray/datasets.py index bd1d6565..997ba987 100644 --- a/cf_xarray/datasets.py +++ b/cf_xarray/datasets.py @@ -503,6 +503,16 @@ def _create_inexact_bounds(): name="flag_var", ) +flag_indep_uint16 = xr.DataArray( + np.array([1, 10, 100, 1000, 10000, 65535], dtype=np.uint16), + dims=("time",), + attrs={ + "flag_masks": [2**i for i in range(16)], + "flag_meanings": " ".join([f"flag_{2**i}" for i in range(16)]), + "standard_name": "flag_independent", + }, + name="flag_var", +) flag_mix = xr.DataArray( np.array([4, 8, 13, 5, 10, 14, 7, 3], np.uint8), @@ -738,3 +748,32 @@ def _create_inexact_bounds(): node_coordinates="node_lon node_lat node_elevation", ), ) + + +def point_dataset(): + from shapely.geometry import MultiPoint, Point + + da = xr.DataArray( + [ + MultiPoint([(1.0, 2.0), (2.0, 3.0)]), + Point(3.0, 4.0), + Point(4.0, 5.0), + Point(3.0, 4.0), + ], + dims=("index",), + name="geometry", + ) + ds = da.to_dataset() + return ds + + +def encoded_point_dataset(): + from .geometry import encode_geometries + + ds = encode_geometries(point_dataset()) + ds["data"] = ( + "index", + np.arange(ds.sizes["index"]), + {"geometry": "geometry_container"}, + ) + return ds diff --git a/cf_xarray/formatting.py b/cf_xarray/formatting.py index 65b21495..4fe58ccb 100644 --- a/cf_xarray/formatting.py +++ b/cf_xarray/formatting.py @@ -110,9 +110,11 @@ def _print_rows(subtitle: str, rows: list[str], rich: bool): # Add subtitle to the first row, align other rows rows = [ - _format_subtitle(subtitle, rich=rich) + row - if i == 0 - else len(subtitle) * " " + row + ( + _format_subtitle(subtitle, rich=rich) + row + if i == 0 + else len(subtitle) * " " + row + ) for i, row in enumerate(rows) ] @@ -151,8 +153,47 @@ def _maybe_panel(textgen, title: str, rich: bool): return title + ":\n" + text -def find_set_bits(mask, value, repeated_masks): - bitpos = np.arange(8)[::-1] +def _get_bit_length(dtype): + # Check if dtype is a numpy dtype, if not, convert it + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + + # Calculate the bit length + bit_length = 8 * dtype.itemsize + + return bit_length + + +def _unpackbits(mask, bit_length): + # Ensure the array is a numpy array + arr = np.asarray(mask) + + # Create an output array of the appropriate shape + output_shape = arr.shape + (bit_length,) + output = np.zeros(output_shape, dtype=np.uint8) + + # Unpack bits + for i in range(bit_length): + output[..., i] = (arr >> i) & 1 + + return output[..., ::-1] + + +def _max_chars_for_bit_length(bit_length): + """ + Find the maximum characters needed for a fixed-width display + for integer values of a certain bit_length. Use calculation + for signed integers, since it conservatively will always have + enough characters for signed or unsigned. + """ + # Maximum value for signed integers of this bit length + max_val = 2 ** (bit_length - 1) - 1 + # Add 1 for the negative sign + return len(str(max_val)) + 1 + + +def find_set_bits(mask, value, repeated_masks, bit_length): + bitpos = np.arange(bit_length)[::-1] if mask not in repeated_masks: if value == 0: return [-1] @@ -161,8 +202,8 @@ def find_set_bits(mask, value, repeated_masks): else: return [int(np.log2(mask))] else: - allset = bitpos[np.unpackbits(np.uint8(mask)) == 1] - setbits = bitpos[np.unpackbits(np.uint8(mask & value)) == 1] + allset = bitpos[_unpackbits(mask, bit_length) == 1] + setbits = bitpos[_unpackbits(mask & value, bit_length) == 1] return [b if abs(b) in setbits else -b for b in allset] @@ -184,6 +225,11 @@ def _format_flags(accessor, rich): # for f, (m, _) in flag_dict.items() # if m is not None and m not in repeated_masks # ] + + bit_length = _get_bit_length(accessor._obj.dtype) + mask_width = _max_chars_for_bit_length(bit_length) + key_width = max(len(key) for key in flag_dict) + bit_text = [] value_text = [] for key, (mask, value) in flag_dict.items(): @@ -191,8 +237,8 @@ def _format_flags(accessor, rich): bit_text.append("✗" if rich else "") value_text.append(str(value)) continue - bits = find_set_bits(mask, value, repeated_masks) - bitstring = ["."] * 8 + bits = find_set_bits(mask, value, repeated_masks, bit_length) + bitstring = ["."] * bit_length if bits == [-1]: continue else: @@ -200,9 +246,9 @@ def _format_flags(accessor, rich): bitstring[abs(b)] = _format_cf_name("1" if b >= 0 else "0", rich) text = "".join(bitstring[::-1]) value_text.append( - f"{mask} & {value}" + f"{mask:{mask_width}} & {value}" if key in excl_flags and value is not None - else str(mask) + else f"{mask:{mask_width}}" ) bit_text.append(text if rich else f" / Bit: {text}") @@ -230,7 +276,9 @@ def _format_flags(accessor, rich): else: rows = [] for val, bit, key in zip(value_text, bit_text, flag_dict): - rows.append(f"{TAB}{_format_cf_name(key, rich)}: {TAB} {val} {bit}") + rows.append( + f"{TAB}{_format_cf_name(key, rich):>{key_width}}: {TAB} {val} {bit}" + ) return _print_rows("Flag Meanings", rows, rich) @@ -247,6 +295,17 @@ def _format_dsg_roles(accessor, dims, rich): ) +def _format_geometries(accessor, dims, rich): + yield make_text_section( + accessor, + "CF Geometries", + "geometries", + dims=dims, + # valid_keys=_DSG_ROLES, + rich=rich, + ) + + def _format_coordinates(accessor, dims, coords, rich): from .accessor import _AXIS_NAMES, _CELL_MEASURES, _COORD_NAMES diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 0f5e7f07..6ca28702 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -1,16 +1,359 @@ from __future__ import annotations -from collections.abc import Sequence +import copy +from collections import ChainMap +from collections.abc import Hashable, Sequence +from dataclasses import dataclass import numpy as np import pandas as pd import xarray as xr +from numpy.typing import ArrayLike + +GEOMETRY_CONTAINER_NAME = "geometry_container" +FEATURES_DIM_NAME = "features" + +__all__ = [ + "decode_geometries", + "encode_geometries", + "cf_to_shapely", + "shapely_to_cf", +] + + +# Useful convention language: +# 1. Whether linked to normal CF space-time coordinates with a nodes attribute or not, inclusion of such coordinates is +# recommended to maintain backward compatibility with software that has not implemented geometry capabilities. +# 2. The geometry node coordinate variables must each have an axis attribute whose allowable values are X, Y, and Z. +# 3. If a coordinates attribute is carried by the geometry container variable or its parent data variable, then those coordinate variables +# that have a meaningful correspondence with node coordinates are indicated as such by a nodes attribute that names the corresponding node +# coordinates, but only if the grid_mapping associated the geometry node variables is the same as that of the coordinate variables. +# If a different grid mapping is used, then the provided coordinates must not have the nodes attribute. +# +# Interpretation: +# 1. node coordinates are exact; the 'normal' coordinates are a reasonable value to use, if you do not know how to interpret the nodes. + + +@dataclass +class GeometryNames: + """Helper class to ease handling of all the variable names needed for CF geometries.""" + + def __init__( + self, + suffix: str = "", + grid_mapping_name: str | None = None, + grid_mapping: str | None = None, + ): + self.container_name: str = GEOMETRY_CONTAINER_NAME + suffix + self.node_dim: str = "node" + suffix + self.node_count: str = "node_count" + suffix + self.node_coordinates_x: str = "x" + suffix + self.node_coordinates_y: str = "y" + suffix + self.coordinates_x: str = "crd_x" + suffix + self.coordinates_y: str = "crd_y" + suffix + self.part_node_count: str = "part_node_count" + suffix + self.part_dim: str = "part" + suffix + self.interior_ring: str = "interior_ring" + suffix + self.attrs_x: dict[str, str] = {} + self.attrs_y: dict[str, str] = {} + self.grid_mapping_attr = {"grid_mapping": grid_mapping} if grid_mapping else {} + + # Special treatment of selected grid mappings + if grid_mapping_name in ["latitude_longitude", "rotated_latitude_longitude"]: + # Special case for longitude_latitude type grid mappings + self.coordinates_x = "lon" + self.coordinates_y = "lat" + if grid_mapping_name == "latitude_longitude": + self.attrs_x = dict(units="degrees_east", standard_name="longitude") + self.attrs_y = dict(units="degrees_north", standard_name="latitude") + elif grid_mapping_name == "rotated_latitude_longitude": + self.attrs_x = dict( + units="degrees_east", standard_name="grid_longitude" + ) + self.attrs_y = dict( + units="degrees_north", standard_name="grid_latitude" + ) + elif grid_mapping_name is not None: + self.attrs_x = dict(standard_name="projection_x_coordinate") + self.attrs_y = dict(standard_name="projection_y_coordinate") + self.attrs_x.update(self.grid_mapping_attr) + self.attrs_y.update(self.grid_mapping_attr) + + @property + def geometry_container_attrs(self) -> dict[str, str]: + return { + "node_count": self.node_count, + "node_coordinates": f"{self.node_coordinates_x} {self.node_coordinates_y}", + "coordinates": f"{self.coordinates_x} {self.coordinates_y}", + **self.grid_mapping_attr, + } + + def coords( + self, + *, + dim: Hashable, + x: ArrayLike, + y: ArrayLike, + crdX: ArrayLike | None = None, + crdY: ArrayLike | None = None, + ) -> dict[str, xr.DataArray]: + """ + Construct coordinate DataArrays for the numpy data (x, y, crdX, crdY) + + Parameters + ---------- + x: array + Node coordinates for X coordinate + y: array + Node coordinates for Y coordinate + crdX: array, optional + Nominal X coordinate + crdY: array, optional + Nominal X coordinate + """ + mapping = { + self.node_coordinates_x: xr.DataArray( + x, dims=self.node_dim, attrs={"axis": "X", **self.attrs_x} + ), + self.node_coordinates_y: xr.DataArray( + y, dims=self.node_dim, attrs={"axis": "Y", **self.attrs_y} + ), + } + if crdX is not None: + mapping[self.coordinates_x] = xr.DataArray( + crdX, + dims=(dim,), + attrs={"nodes": self.node_coordinates_x, **self.attrs_x}, + ) + if crdY is not None: + mapping[self.coordinates_y] = xr.DataArray( + crdY, + dims=(dim,), + attrs={"nodes": self.node_coordinates_y, **self.attrs_y}, + ) + return mapping + + +def _assert_single_geometry_container(ds: xr.Dataset) -> Hashable: + container_names = _get_geometry_containers(ds) + if len(container_names) > 1: + raise ValueError( + "Only one geometry container is supported by cf_to_points. " + "To handle multiple geometries use `decode_geometries` instead." + ) + (container_name,) = container_names + return container_name + + +def _get_geometry_containers(obj: xr.DataArray | xr.Dataset) -> list[Hashable]: + """ + Translate from key (either CF key or variable name) to its bounds' variable names. + + This function interprets the ``geometry`` attribute on DataArrays. + + Parameters + ---------- + obj : DataArray, Dataset + DataArray belonging to the coordinate to be checked + + Returns + ------- + List[str] + Variable name(s) in parent xarray object that are bounds of `key` + """ + + if isinstance(obj, xr.DataArray): + obj = obj._to_temp_dataset() + variables = obj._variables + + results = set() + for name, var in variables.items(): + attrs_or_encoding = ChainMap(var.attrs, var.encoding) + if "geometry_type" in attrs_or_encoding: + results.update([name]) + return list(results) + + +def decode_geometries(encoded: xr.Dataset) -> xr.Dataset: + """ + Decode CF encoded geometries to numpy object arrays containing shapely geometries. + + Parameters + ---------- + encoded : Dataset + A Xarray Dataset containing encoded geometries. + + Returns + ------- + Dataset + A Xarray Dataset containing decoded geometries. + + See Also + -------- + shapely_to_cf + cf_to_shapely + encode_geometries + """ + + containers = _get_geometry_containers(encoded) + if not containers: + raise NotImplementedError( + "No geometry container variables detected, none of the provided variables " + "have a `geometry_type` attribute." + ) + + todrop: list[Hashable] = [] + decoded = xr.Dataset() + for container_name in containers: + enc_geom_var = encoded[container_name] + geom_attrs = enc_geom_var.attrs + + # Grab the coordinates attribute + geom_attrs.update(enc_geom_var.encoding) + + geom_var = cf_to_shapely(encoded, container=container_name).variable + + todrop.extend( + (container_name,) + + tuple( + s + for s in " ".join( + geom_attrs.get(attr, "") + for attr in [ + "interior_ring", + "node_coordinates", + "node_count", + "part_node_count", + "coordinates", + ] + ).split(" ") + if s + ) + ) + + name = geom_attrs.get("variable_name", None) + if name in encoded.dims: + decoded = decoded.assign_coords( + xr.Coordinates(coords={name: geom_var}, indexes={}) + ) + else: + decoded[name] = geom_var + + decoded.update(encoded.drop_vars(todrop)) + + # Is this a good idea? We are deleting information. + # OTOH we have decoded it to a useful in-memory representation + for var in decoded._variables.values(): + if var.attrs.get("geometry") in containers: + var.attrs.pop("geometry") + return decoded + + +def encode_geometries(ds: xr.Dataset): + """ + Encode any discovered geometry variables using the CF conventions. + + Practically speaking, geometry variables are numpy object arrays where the first + element is a shapely geometry. + + Parameters + ---------- + ds : Dataset + Dataset containing at least one geometry variable. + + Returns + ------- + Dataset + Where all geometry variables are encoded. The information in a single geometry + variable in the input is split across multiple variables in the returned Dataset + following the CF conventions. + + See Also + -------- + shapely_to_cf + cf_to_shapely + """ + from shapely import ( + LineString, + MultiLineString, + MultiPoint, + MultiPolygon, + Point, + Polygon, + ) + + SHAPELY_TYPES = ( + Point, + LineString, + Polygon, + MultiPoint, + MultiLineString, + MultiPolygon, + ) + + geom_var_names = [ + name + for name, var in ds._variables.items() + if var.dtype == "O" and isinstance(var.data.flat[0], SHAPELY_TYPES) + ] + if not geom_var_names: + return ds + + if to_drop := set(geom_var_names) & set(ds._indexes): + # e.g. xvec GeometryIndex + ds = ds.drop_indexes(to_drop) + + variables = {} + for name in geom_var_names: + # TODO: do we prefer this choice be invariant to number of geometry variables + suffix = "_" + str(name) if len(geom_var_names) > 1 else "" + container_name = GEOMETRY_CONTAINER_NAME + suffix + # If `name` is a dimension name, then we need to drop it. Otherwise we don't + # So set errors="ignore" + variables.update( + shapely_to_cf(ds[name], suffix=suffix) + .drop_vars(name, errors="ignore") + ._variables + ) + + geom_var = ds[name] + more_updates = {} + for varname, var in ds._variables.items(): + if varname == name: + continue + # TODO: this is incomplete. It works for vector data cubes where one of the geometry vars + # is a dimension coordinate. + if name in var.dims: + var = var.copy() + var._attrs = copy.deepcopy(var._attrs) + var.attrs["geometry"] = container_name + # The grid_mapping and coordinates attributes can be carried by the geometry container + # variable provided they are also carried by the data variables associated with the container. + if to_add := geom_var.attrs.get("coordinates", ""): + var.attrs["coordinates"] = var.attrs.get("coordinates", "") + to_add + more_updates[varname] = var + variables.update(more_updates) + + # WARNING: cf-xarray specific convention. + # For vector data cubes, `name` is a dimension name. + # By encoding to CF, we have + # encoded the information in that variable across many different + # variables (e.g. node_count) with `name` as a dimension. + # We have to record `name` somewhere so that we reconstruct + # a geometry variable of the right name at decode-time. + variables[container_name].attrs["variable_name"] = name + + encoded = xr.Dataset(variables).set_coords( + set(ds._coord_names) - set(geom_var_names) + ) + + return encoded def reshape_unique_geometries( ds: xr.Dataset, geom_var: str = "geometry", - new_dim: str = "features", + new_dim: str = FEATURES_DIM_NAME, ) -> xr.Dataset: """Reshape a dataset containing a geometry variable so that all unique features are identified along a new dimension. @@ -67,17 +410,19 @@ def reshape_unique_geometries( out[geom_var] = ds[geom_var].isel({old_name: unique_indexes}) if old_name not in ds.coords: # If there was no coord before, drop the dummy one we made. - out = out.drop_vars(old_name) + out = out.drop_vars(old_name) # type: ignore[arg-type,unused-ignore] # Hashable/str stuff return out -def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None = None): +def shapely_to_cf( + geometries: xr.DataArray | Sequence, + grid_mapping: str | None = None, + *, + suffix: str = "", +): """ Convert a DataArray with shapely geometry objects into a CF-compliant dataset. - .. warning:: - Only point geometries are currently implemented. - Parameters ---------- geometries : sequence of shapely geometries or xarray.DataArray @@ -88,8 +433,8 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None A CF grid mapping name. When given, coordinates and attributes are named and set accordingly. Defaults to None, in which case the coordinates are simply names "crd_x" and "crd_y". - .. warning:: - Only the `longitude_latitude` grid mapping is currently implemented. + container_name: str, optional + Name for the "geometry container" scalar variable in the encoded Dataset Returns ------- @@ -97,54 +442,64 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None A dataset with shapely geometry objects translated into CF-compliant variables : - 'x', 'y' : the node coordinates - 'crd_x', 'crd_y' : the feature coordinates (might have different names if `grid_mapping` is available). - - 'node_count' : The number of nodes per feature. Absent if all instances are Points. - - 'geometry_container' : Empty variable with attributes describing the geometry type. - - Other variables are not implemented as only Points are currently understood. + - 'node_count' : The number of nodes per feature. Always present for Lines and Polygons. For Points: only present if there are multipart geometries. + - 'part_node_count' : The number of nodes per individual geometry. Only for Lines with multipart geometries and for Polygons with multipart geometries or holes. + - 'interior_ring' : Integer boolean indicating whether rings are interior or exterior. Only for Polygons with holes. + - container_name : Empty variable with attributes describing the geometry type. References ---------- Please refer to the CF conventions document: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#geometries """ + + if isinstance(geometries, xr.DataArray) and grid_mapping is not None: + raise DeprecationWarning( + "Explicitly passing `grid_mapping` with DataArray of geometries is deprecated. " + "Please set a `grid_mapping` attribute on `geometries`, ", + "and set the grid mapping variable as a coordinate", + ) + # Get all types to call the appropriate translation function. types = { geom.item().geom_type if isinstance(geom, xr.DataArray) else geom.geom_type for geom in geometries } + + grid_mapping_varname = None + if ( + grid_mapping is None + and isinstance(geometries, xr.DataArray) + and (grid_mapping_varname := geometries.attrs.get("grid_mapping")) + ): + if grid_mapping_varname in geometries.coords: + # Not all CRS can be encoded in CF + grid_mapping = geometries.coords[grid_mapping_varname].attrs.get( + "grid_mapping_name", None + ) + + # TODO: consider accepting a GeometryNames instance from the user instead + names = GeometryNames( + suffix=suffix, grid_mapping_name=grid_mapping, grid_mapping=grid_mapping_varname + ) + if types.issubset({"Point", "MultiPoint"}): - ds = points_to_cf(geometries) + ds = points_to_cf(geometries, names=names) elif types.issubset({"LineString", "MultiLineString"}): - ds = lines_to_cf(geometries) + ds = lines_to_cf(geometries, names=names) elif types.issubset({"Polygon", "MultiPolygon"}): - raise NotImplementedError("Polygon geometry conversion is not implemented.") + ds = polygons_to_cf(geometries, names=names) else: raise ValueError( f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}" ) - # Special treatment of selected grid mappings - if grid_mapping == "longitude_latitude": - # Special case for longitude_latitude grid mapping - ds = ds.rename(crd_x="lon", crd_y="lat") - ds.lon.attrs.update(units="degrees_east", standard_name="longitude") - ds.lat.attrs.update(units="degrees_north", standard_name="latitude") - ds.geometry_container.attrs.update(coordinates="lon lat") - ds.x.attrs.update(units="degrees_east", standard_name="longitude") - ds.y.attrs.update(units="degrees_north", standard_name="latitude") - elif grid_mapping is not None: - raise NotImplementedError( - f"Only grid mapping longitude_latitude is implemented. Got {grid_mapping}." - ) - return ds -def cf_to_shapely(ds: xr.Dataset): +def cf_to_shapely(ds: xr.Dataset, *, container: Hashable = GEOMETRY_CONTAINER_NAME): """ Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable. - .. warning:: - Only point geometries are currently implemented. - Parameters ---------- ds : xr.Dataset @@ -162,22 +517,35 @@ def cf_to_shapely(ds: xr.Dataset): ---------- Please refer to the CF conventions document: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#geometries """ - geom_type = ds.geometry_container.attrs["geometry_type"] + if container not in ds._variables: + raise ValueError( + f"{container!r} is not the name of a variable in the provided Dataset." + ) + if not (geom_type := ds[container].attrs.get("geometry_type", None)): + raise ValueError( + f"{container!r} is not the name of a valid geometry variable. " + "It does not have a `geometry_type` attribute." + ) + + # Extract all necessary geometry variables + subds = ds.cf[[container]] if geom_type == "point": - geometries = cf_to_points(ds) + geometries = cf_to_points(subds) elif geom_type == "line": - geometries = cf_to_lines(ds) + geometries = cf_to_lines(subds) elif geom_type == "polygon": - raise NotImplementedError("Polygon geometry conversion is not implemented.") + geometries = cf_to_polygons(subds) else: raise ValueError( f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}" ) + if gm := ds[container].attrs.get("grid_mapping"): + geometries.attrs["grid_mapping"] = gm return geometries.rename("geometry") -def points_to_cf(pts: xr.DataArray | Sequence): +def points_to_cf(pts: xr.DataArray | Sequence, *, names: GeometryNames | None = None): """Get a list of points (shapely.geometry.[Multi]Point) and return a CF-compliant geometry dataset. Parameters @@ -194,11 +562,14 @@ def points_to_cf(pts: xr.DataArray | Sequence): from shapely.geometry import MultiPoint if isinstance(pts, xr.DataArray): + # TODO: Fix this hardcoding + if pts.ndim != 1: + raise ValueError("Only 1D DataArrays are supported.") dim = pts.dims[0] coord = pts[dim] if dim in pts.coords else None pts_ = pts.values.tolist() else: - dim = "features" + dim = FEATURES_DIM_NAME coord = None pts_ = pts @@ -214,33 +585,27 @@ def points_to_cf(pts: xr.DataArray | Sequence): crdX.append(xy[0, 0]) crdY.append(xy[0, 1]) + if names is None: + names = GeometryNames() + ds = xr.Dataset( data_vars={ - "node_count": xr.DataArray(node_count, dims=(dim,)), - "geometry_container": xr.DataArray( - attrs={ - "geometry_type": "point", - "node_count": "node_count", - "node_coordinates": "x y", - "coordinates": "crd_x crd_y", - } + names.node_count: xr.DataArray(node_count, dims=(dim,)), + names.container_name: xr.DataArray( + data=np.nan, + attrs={"geometry_type": "point", **names.geometry_container_attrs}, ), }, - coords={ - "x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}), - "y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}), - "crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}), - "crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}), - }, + coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim), ) if coord is not None: ds = ds.assign_coords({dim: coord}) # Special case when we have no MultiPoints - if (ds.node_count == 1).all(): - ds = ds.drop_vars("node_count") - del ds.geometry_container.attrs["node_count"] + if (ds[names.node_count] == 1).data.all(): + ds = ds.drop_vars(names.node_count) + del ds[names.container_name].attrs["node_count"] return ds @@ -251,7 +616,7 @@ def cf_to_points(ds: xr.Dataset): ---------- ds : xr.Dataset A dataset with CF-compliant point geometries. - Must have a "geometry_container" variable with at least a 'node_coordinates' attribute. + Must have a *single* "geometry container" variable with at least a 'node_coordinates' attribute. Must also have the two 1D variables listed by this attribute. Returns @@ -263,8 +628,9 @@ def cf_to_points(ds: xr.Dataset): """ from shapely.geometry import MultiPoint, Point + container_name = _assert_single_geometry_container(ds) # Shorthand for convenience - geo = ds.geometry_container.attrs + geo = ds[container_name].attrs # The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present. feat_dim = None @@ -272,14 +638,14 @@ def cf_to_points(ds: xr.Dataset): xcoord_name, _ = geo["coordinates"].split(" ") (feat_dim,) = ds[xcoord_name].dims - x_name, y_name = ds.geometry_container.attrs["node_coordinates"].split(" ") + x_name, y_name = ds[container_name].attrs["node_coordinates"].split(" ") xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) - node_count_name = ds.geometry_container.attrs.get("node_count") + node_count_name = ds[container_name].attrs.get("node_count") if node_count_name is None: # No node_count means all geometries are single points (node_count = 1) - # And if we had no coordinates, then the dimension defaults to "features" - feat_dim = feat_dim or "features" + # And if we had no coordinates, then the dimension defaults to FEATURES_DIM_NAME + feat_dim = feat_dim or FEATURES_DIM_NAME node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,)) if feat_dim in ds.coords: node_count = node_count.assign_coords({feat_dim: ds[feat_dim]}) @@ -296,10 +662,13 @@ def cf_to_points(ds: xr.Dataset): geoms[i] = MultiPoint(xy[j : j + n, :]) j += n - return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) + da = xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) + if node_count_name: + del da[node_count_name] + return da -def lines_to_cf(lines: xr.DataArray | Sequence): +def lines_to_cf(lines: xr.DataArray | Sequence, *, names: GeometryNames | None = None): """Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset. Parameters @@ -324,17 +693,20 @@ def lines_to_cf(lines: xr.DataArray | Sequence): coord = None lines_ = np.array(lines) + if names is None: + names = GeometryNames() + _, arr, offsets = to_ragged_array(lines_) x = arr[:, 0] y = arr[:, 1] - node_count = np.diff(offsets[0]) + part_node_count = np.diff(offsets[0]) if len(offsets) == 1: indices = offsets[0] - part_node_count = node_count + node_count = part_node_count else: indices = np.take(offsets[0], offsets[1]) - part_node_count = np.diff(indices) + node_count = np.diff(indices) geom_coords = arr.take(indices[:-1], 0) crdX = geom_coords[:, 0] @@ -342,33 +714,23 @@ def lines_to_cf(lines: xr.DataArray | Sequence): ds = xr.Dataset( data_vars={ - "node_count": xr.DataArray(node_count, dims=("segment",)), - "part_node_count": xr.DataArray(part_node_count, dims=(dim,)), - "geometry_container": xr.DataArray( - attrs={ - "geometry_type": "line", - "node_count": "node_count", - "part_node_count": "part_node_count", - "node_coordinates": "x y", - "coordinates": "crd_x crd_y", - } + names.node_count: xr.DataArray(node_count, dims=(dim,)), + names.container_name: xr.DataArray( + data=np.nan, + attrs={"geometry_type": "line", **names.geometry_container_attrs}, ), }, - coords={ - "x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}), - "y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}), - "crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}), - "crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}), - }, + coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim), ) if coord is not None: ds = ds.assign_coords({dim: coord}) # Special case when we have no MultiLines - if len(ds.part_node_count) == len(ds.node_count): - ds = ds.drop_vars("part_node_count") - del ds.geometry_container.attrs["part_node_count"] + if len(part_node_count) != len(node_count): + ds[names.part_node_count] = xr.DataArray(part_node_count, dims=names.part_dim) + ds[names.container_name].attrs["part_node_count"] = names.part_node_count + return ds @@ -391,10 +753,12 @@ def cf_to_lines(ds: xr.Dataset): """ from shapely import GeometryType, from_ragged_array + container_name = _assert_single_geometry_container(ds) + # Shorthand for convenience - geo = ds.geometry_container.attrs + geo = ds[container_name].attrs - # The features dimension name, defaults to the one of 'part_node_count' + # The features dimension name, defaults to the one of 'node_count' # or the dimension of the coordinates, if present. feat_dim = None if "coordinates" in geo: @@ -405,39 +769,192 @@ def cf_to_lines(ds: xr.Dataset): xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) node_count_name = geo.get("node_count") - part_node_count_name = geo.get("part_node_count") + part_node_count_name = geo.get("part_node_count", node_count_name) if node_count_name is None: raise ValueError("'node_count' must be provided for line geometries") else: node_count = ds[node_count_name] + feat_dim = feat_dim or "index" + if feat_dim in ds.coords: + node_count = node_count.assign_coords({feat_dim: ds[feat_dim]}) - offset1 = np.insert(np.cumsum(node_count.values), 0, 0) + # first get geometries for all the parts + part_node_count = ds[part_node_count_name] + offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0) lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,)) - if part_node_count_name is None: - # No part_node_count means there are no multilines - # And if we had no coordinates, then the dimension defaults to "index" + # get index of offset2 values that are edges for part_node_count + offset2 = np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0] + + multilines = from_ragged_array( + GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2) + ) + + # get items from lines or multilines depending on number of parts + geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) + + return xr.DataArray( + geoms, dims=node_count.dims, coords=node_count.coords + ).drop_vars(node_count_name) + + +def polygons_to_cf( + polygons: xr.DataArray | Sequence, *, names: GeometryNames | None = None +): + """Convert an iterable of polygons (shapely.geometry.[Multi]Polygon) into a CF-compliant geometry dataset. + + Parameters + ---------- + polygons : sequence of shapely.geometry.Polygon or MultiPolygon + The sequence of [multi]polygons to translate to a CF dataset. + + names: GeometryNames, optional + Structure that helps manipulate geometry attrs. + + Returns + ------- + xr.Dataset + A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container' + and optionally 'part_node_count'. + """ + from shapely import to_ragged_array + + if isinstance(polygons, xr.DataArray): + dim = polygons.dims[0] + coord = polygons[dim] if dim in polygons.coords else None + polygons_ = polygons.values + else: + dim = "index" + coord = None + polygons_ = np.array(polygons) + + if names is None: + names = GeometryNames() + + _, arr, offsets = to_ragged_array(polygons_) + x = arr[:, 0] + y = arr[:, 1] + + part_node_count = np.diff(offsets[0]) + if len(offsets) == 1: + indices = offsets[0] + node_count = part_node_count + elif len(offsets) >= 2: + indices = np.take(offsets[0], offsets[1]) + interior_ring = np.isin(offsets[0], indices, invert=True)[:-1].astype(int) + + if len(offsets) == 3: + indices = np.take(indices, offsets[2]) + + node_count = np.diff(indices) + + geom_coords = arr.take(indices[:-1], 0) + crdX = geom_coords[:, 0] + crdY = geom_coords[:, 1] + + ds = xr.Dataset( + data_vars={ + names.node_count: xr.DataArray(node_count, dims=(dim,)), + names.container_name: xr.DataArray( + data=np.nan, + attrs={"geometry_type": "polygon", **names.geometry_container_attrs}, + ), + }, + coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim), + ) + + if coord is not None: + ds = ds.assign_coords({dim: coord}) + + # Special case when we have no MultiPolygons and no holes + if len(part_node_count) != len(node_count): + ds[names.part_node_count] = xr.DataArray(part_node_count, dims=names.part_dim) + ds[names.container_name].attrs["part_node_count"] = names.part_node_count + + # Special case when we have no holes + if (interior_ring != 0).any(): + ds[names.interior_ring] = xr.DataArray(interior_ring, dims=names.part_dim) + ds[names.container_name].attrs["interior_ring"] = names.interior_ring + return ds + + +def cf_to_polygons(ds: xr.Dataset): + """Convert polygon geometries stored in a CF-compliant way to shapely polygons stored in a single variable. + + Parameters + ---------- + ds : xr.Dataset + A dataset with CF-compliant polygon geometries. + Must have a "geometry_container" variable with at least a 'node_coordinates' attribute. + Must also have the two 1D variables listed by this attribute. + + Returns + ------- + geometry : xr.DataArray + A 1D array of shapely.geometry.[Multi]Polygon objects. + It has the same dimension as the ``part_node_count`` or the coordinates variables, or + ``'features'`` if those were not present in ``ds``. + """ + from shapely import GeometryType, from_ragged_array + + container_name = _assert_single_geometry_container(ds) + + # Shorthand for convenience + geo = ds[container_name].attrs + + # The features dimension name, defaults to the one of 'part_node_count' + # or the dimension of the coordinates, if present. + feat_dim = None + if "coordinates" in geo: + xcoord_name, _ = geo["coordinates"].split(" ") + (feat_dim,) = ds[xcoord_name].dims + + x_name, y_name = geo["node_coordinates"].split(" ") + xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) + + node_count_name = geo.get("node_count") + part_node_count_name = geo.get("part_node_count", node_count_name) + interior_ring_name = geo.get("interior_ring") + + if node_count_name is None: + raise ValueError("'node_count' must be provided for polygon geometries") + else: + node_count = ds[node_count_name] feat_dim = feat_dim or "index" - part_node_count = xr.DataArray(node_count.values, dims=(feat_dim,)) if feat_dim in ds.coords: - part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]}) + node_count = node_count.assign_coords({feat_dim: ds[feat_dim]}) - geoms = lines + # first get geometries for all the rings + part_node_count = ds[part_node_count_name] + offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0) + + if interior_ring_name is None: + offset2 = np.array(list(range(len(offset1)))) else: - part_node_count = ds[part_node_count_name] - - # get index of offset1 values that are edges for part_node_count - offset2 = np.nonzero( - np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0)) - )[0] - multilines = from_ragged_array( - GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2) + interior_ring = ds[interior_ring_name] + if not interior_ring[0] == 0: + raise ValueError("coordinate array must start with an exterior ring") + offset2 = np.append(np.where(interior_ring == 0)[0], [len(part_node_count)]) + + polygons = from_ragged_array(GeometryType.POLYGON, xy, offsets=(offset1, offset2)) + + # get index of offset2 values that are edges for node_count + offset3 = np.nonzero( + np.isin( + offset2, + np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0], ) + )[0] + multipolygons = from_ragged_array( + GeometryType.MULTIPOLYGON, xy, offsets=(offset1, offset2, offset3) + ) - # get items from lines or multilines depending on number of segments - geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) + # get items from polygons or multipolygons depending on number of parts + geoms = np.where(np.diff(offset3) == 1, polygons[offset3[:-1]], multipolygons) - return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) + return xr.DataArray( + geoms, dims=node_count.dims, coords=node_count.coords + ).drop_vars(node_count_name) def grid_to_polygons(ds: xr.Dataset) -> xr.DataArray: diff --git a/cf_xarray/helpers.py b/cf_xarray/helpers.py index 1ce6058f..94b024a7 100644 --- a/cf_xarray/helpers.py +++ b/cf_xarray/helpers.py @@ -48,7 +48,7 @@ def _guess_bounds_2d(da, dims): # At this point, we might have different corners for adjacent cells, we average them together to have a nice grid # To make this vectorized and keep the edges, we'll pad with NaNs and ignore them in the averages daXYp = ( - daXY.pad({d: (1, 1) for d in dims}, mode="constant", constant_values=np.NaN) + daXY.pad({d: (1, 1) for d in dims}, mode="constant", constant_values=np.nan) .transpose(*dims, "Xbnds", "Ybnds") .values ) # Tranpose for an easier notation diff --git a/cf_xarray/tests/conftest.py b/cf_xarray/tests/conftest.py new file mode 100644 index 00000000..7518817e --- /dev/null +++ b/cf_xarray/tests/conftest.py @@ -0,0 +1,55 @@ +import numpy as np +import pytest +import xarray as xr + + +@pytest.fixture(scope="session") +def geometry_ds(): + pytest.importorskip("shapely") + + from shapely.geometry import MultiPoint, Point + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(4, dtype=object) + geoms[:] = [ + MultiPoint([(1.0, 2.0), (2.0, 3.0)]), + Point(3.0, 4.0), + Point(4.0, 5.0), + Point(3.0, 4.0), + ] + + ds = xr.Dataset( + { + "data": xr.DataArray( + range(len(geoms)), + dims=("index",), + attrs={ + "coordinates": "crd_x crd_y", + }, + ), + "time": xr.DataArray([0, 0, 0, 1], dims=("index",)), + } + ) + shp_ds = ds.assign(geometry=xr.DataArray(geoms, dims=("index",))) + # Here, since it should not be present in shp_ds + ds.data.attrs["geometry"] = "geometry_container" + + cf_ds = ds.assign( + x=xr.DataArray([1.0, 2.0, 3.0, 4.0, 3.0], dims=("node",), attrs={"axis": "X"}), + y=xr.DataArray([2.0, 3.0, 4.0, 5.0, 4.0], dims=("node",), attrs={"axis": "Y"}), + node_count=xr.DataArray([2, 1, 1, 1], dims=("index",)), + crd_x=xr.DataArray([1.0, 3.0, 4.0, 3.0], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([2.0, 4.0, 5.0, 4.0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "point", + "node_count": "node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_ds diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 2bda143d..5cc58688 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -26,6 +26,7 @@ dsg, flag_excl, flag_indep, + flag_indep_uint16, flag_mix, forecast, mollwds, @@ -164,6 +165,7 @@ def test_repr() -> None: # Flag DataArray assert "Flag Variable" in repr(flag_excl.cf) assert "Flag Variable" in repr(flag_indep.cf) + assert "Flag Variable" in repr(flag_indep_uint16.cf) assert "Flag Variable" in repr(flag_mix.cf) assert "Flag Variable" in repr(basin.cf) @@ -1452,9 +1454,9 @@ def test_rename(obj): "air_temperature" if isinstance(obj, Dataset) else "longitude": "renamed" } xr_dict = { - "air" - if isinstance(obj, Dataset) and "air" in obj.data_vars - else "lon": "renamed" + ( + "air" if isinstance(obj, Dataset) and "air" in obj.data_vars else "lon" + ): "renamed" } assert_identical(obj.rename(xr_dict), obj.cf.rename(cf_dict)) assert_identical(obj.rename(**xr_dict), obj.cf.rename(**cf_dict)) @@ -1837,6 +1839,30 @@ def test_flag_indep(self) -> None: res = flag_indep.cf.flags[name] np.testing.assert_equal(res.to_numpy(), expected[i]) + def test_flag_indep_uint16(self) -> None: + expected = [ + [True, False, False, False, False, True], # bit 1 + [False, True, False, False, False, True], # bit 2 + [False, False, True, False, False, True], # bit 4 + [False, True, False, True, False, True], # bit 8 + [False, False, False, False, True, True], # bit 16 + [False, False, True, True, False, True], # bit 32 + [False, False, True, True, False, True], # bit 64 + [False, False, False, True, False, True], # bit 128 + [False, False, False, True, True, True], # bit 256 + [False, False, False, True, True, True], # bit 512 + [False, False, False, False, True, True], # bit 1024 + [False, False, False, False, False, True], # bit 2048 + [False, False, False, False, False, True], # bit 4096 + [False, False, False, False, True, True], # bit 8192 + [False, False, False, False, False, True], # bit 16384 + [False, False, False, False, False, True], # bit 32768 + ] + for i in range(16): + name = f"flag_{2**i}" + res = flag_indep_uint16.cf.flags[name] + np.testing.assert_equal(res.to_numpy(), expected[i]) + def test_flag_mix(self) -> None: expected = [ [False, False, True, True, False, False, True, True], # flag 1 @@ -1983,6 +2009,7 @@ def plane(coords, slopex, slopey): [basin, "Flag Variable"], [flag_mix, "Flag Variable"], [flag_indep, "Flag Variable"], + [flag_indep_uint16, "Flag Variable"], [flag_excl, "Flag Variable"], [dsg, "Discrete Sampling Geometry"], ), @@ -2049,3 +2076,25 @@ def test_ancillary_variables_extra_dim(): } ) assert_identical(ds.cf["X"], ds["x"]) + + +def test_geometry_association(geometry_ds): + cf_ds, _ = geometry_ds + actual = cf_ds.cf[["data"]] + for name in ["geometry_container", "x", "y", "node_count", "crd_x", "crd_y"]: + assert name in actual.coords + + actual = cf_ds.cf["data"] + for name in ["geometry_container", "node_count", "crd_x", "crd_y"]: + assert name in actual.coords + + assert cf_ds.cf.geometries == {"point": ["geometry_container"]} + assert_identical(cf_ds.cf["geometry"], cf_ds["geometry_container"]) + with pytest.raises(ValueError): + cf_ds.cf["point"] + + expected = cf_ds[["geometry_container", "node_count", "x", "y", "crd_x", "crd_y"]] + assert_identical( + cf_ds.cf[["point"]], + expected.set_coords(["node_count", "x", "y", "crd_x", "crd_y"]), + ) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index bcb521b4..31cb2de8 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -4,49 +4,21 @@ import cf_xarray as cfxr +from ..geometry import decode_geometries, encode_geometries from . import requires_shapely @pytest.fixture -def geometry_ds(): - from shapely.geometry import MultiPoint, Point +def polygon_geometry() -> xr.DataArray: + from shapely.geometry import Polygon # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. - geoms = np.empty(4, dtype=object) + geoms = np.empty(2, dtype=object) geoms[:] = [ - MultiPoint([(1.0, 2.0), (2.0, 3.0)]), - Point(3.0, 4.0), - Point(4.0, 5.0), - Point(3.0, 4.0), + Polygon(([50, 0], [40, 15], [30, 0])), + Polygon(([70, 50], [60, 65], [50, 50])), ] - - ds = xr.Dataset( - { - "data": xr.DataArray(range(len(geoms)), dims=("index",)), - "time": xr.DataArray([0, 0, 0, 1], dims=("index",)), - } - ) - shp_ds = ds.assign(geometry=xr.DataArray(geoms, dims=("index",))) - - cf_ds = ds.assign( - x=xr.DataArray([1.0, 2.0, 3.0, 4.0, 3.0], dims=("node",), attrs={"axis": "X"}), - y=xr.DataArray([2.0, 3.0, 4.0, 5.0, 4.0], dims=("node",), attrs={"axis": "Y"}), - node_count=xr.DataArray([2, 1, 1, 1], dims=("index",)), - crd_x=xr.DataArray([1.0, 3.0, 4.0, 3.0], dims=("index",), attrs={"nodes": "x"}), - crd_y=xr.DataArray([2.0, 4.0, 5.0, 4.0], dims=("index",), attrs={"nodes": "y"}), - geometry_container=xr.DataArray( - attrs={ - "geometry_type": "point", - "node_count": "node_count", - "node_coordinates": "x y", - "coordinates": "crd_x crd_y", - } - ), - ) - - cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) - - return cf_ds, shp_ds + return xr.DataArray(geoms, dims=("index",), name="geometry") @pytest.fixture @@ -71,8 +43,8 @@ def geometry_line_ds(): y=xr.DataArray( [0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"} ), - part_node_count=xr.DataArray([4, 3, 3], dims=("index",)), - node_count=xr.DataArray([2, 2, 3, 3], dims=("segment",)), + node_count=xr.DataArray([4, 3, 3], dims=("index",)), + part_node_count=xr.DataArray([2, 2, 3, 3], dims=("part",)), crd_x=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), crd_y=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), geometry_container=xr.DataArray( @@ -108,7 +80,7 @@ def geometry_line_without_multilines_ds(): cf_ds = ds.assign( x=xr.DataArray([0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}), y=xr.DataArray([0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}), - node_count=xr.DataArray([3, 3], dims=("segment",)), + node_count=xr.DataArray([3, 3], dims=("index",)), crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), geometry_container=xr.DataArray( @@ -126,13 +98,155 @@ def geometry_line_without_multilines_ds(): return cf_ds, shp_da +@pytest.fixture +def geometry_polygon_without_holes_ds(polygon_geometry): + shp_da = polygon_geometry + ds = xr.Dataset() + + cf_ds = ds.assign( + x=xr.DataArray( + [50, 40, 30, 50, 70, 60, 50, 70], dims=("node",), attrs={"axis": "X"} + ), + y=xr.DataArray( + [0, 15, 0, 0, 50, 65, 50, 50], dims=("node",), attrs={"axis": "Y"} + ), + node_count=xr.DataArray([4, 4], dims=("index",)), + crd_x=xr.DataArray([50, 70], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0, 50], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + +@pytest.fixture +def geometry_polygon_without_multipolygons_ds(): + from shapely.geometry import Polygon + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + Polygon(([50, 0], [40, 15], [30, 0])), + Polygon( + ([70, 50], [60, 65], [50, 50]), + [ + ([55, 55], [60, 60], [65, 55]), + ], + ), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [50, 40, 30, 50, 70, 60, 50, 70, 55, 60, 65, 55], + dims=("node",), + attrs={"axis": "X"}, + ), + y=xr.DataArray( + [0, 15, 0, 0, 50, 65, 50, 50, 55, 60, 55, 55], + dims=("node",), + attrs={"axis": "Y"}, + ), + node_count=xr.DataArray([4, 8], dims=("index",)), + part_node_count=xr.DataArray([4, 4, 4], dims=("part",)), + interior_ring=xr.DataArray([0, 0, 1], dims=("part",)), + crd_x=xr.DataArray([50, 70], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0, 50], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "part_node_count": "part_node_count", + "interior_ring": "interior_ring", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + +@pytest.fixture +def geometry_polygon_ds(): + from shapely.geometry import MultiPolygon, Polygon + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + MultiPolygon( + [ + ( + ([20, 0], [10, 15], [0, 0]), + [ + ([5, 5], [10, 10], [15, 5]), + ], + ), + (([20, 20], [10, 35], [0, 20]),), + ] + ), + Polygon(([50, 0], [40, 15], [30, 0])), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [20, 10, 0, 20, 5, 10, 15, 5, 20, 10, 0, 20, 50, 40, 30, 50], + dims=("node",), + attrs={"axis": "X"}, + ), + y=xr.DataArray( + [0, 15, 0, 0, 5, 10, 5, 5, 20, 35, 20, 20, 0, 15, 0, 0], + dims=("node",), + attrs={"axis": "Y"}, + ), + node_count=xr.DataArray([12, 4], dims=("index",)), + part_node_count=xr.DataArray([4, 4, 4, 4], dims=("part",)), + interior_ring=xr.DataArray([0, 1, 0, 0], dims=("part",)), + crd_x=xr.DataArray([20, 50], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0, 0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "polygon", + "node_count": "node_count", + "part_node_count": "part_node_count", + "interior_ring": "interior_ring", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + @requires_shapely def test_shapely_to_cf(geometry_ds): from shapely.geometry import Point expected, in_ds = geometry_ds + expected = expected.copy(deep=True) + # This isn't really a roundtrip test out = xr.merge([in_ds.drop_vars("geometry"), cfxr.shapely_to_cf(in_ds.geometry)]) + del expected.data.attrs["geometry"] xr.testing.assert_identical(out, expected) out = xr.merge( @@ -151,8 +265,8 @@ def test_shapely_to_cf(geometry_ds): [ in_ds.drop_vars("geometry").isel(index=slice(1, None)), cfxr.shapely_to_cf( - in_ds.geometry.isel(index=slice(1, None)), - grid_mapping="longitude_latitude", + in_ds.geometry.isel(index=slice(1, None)).data, + grid_mapping="latitude_longitude", ), ] ) @@ -192,6 +306,43 @@ def test_shapely_to_cf_for_lines_without_multilines( xr.testing.assert_identical(actual, expected) +@requires_shapely +def test_shapely_to_cf_for_polygons_as_da(geometry_polygon_ds): + expected, in_da = geometry_polygon_ds + + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + in_da = in_da.assign_coords(index=["a", "b"]) + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b"])) + + +@requires_shapely +def test_shapely_to_cf_for_polygons_as_sequence(geometry_polygon_ds): + expected, in_da = geometry_polygon_ds + actual = cfxr.shapely_to_cf(in_da.values) + xr.testing.assert_identical(actual, expected) + + +@requires_shapely +def test_shapely_to_cf_for_polygons_without_multipolygons( + geometry_polygon_without_multipolygons_ds, +): + expected, in_da = geometry_polygon_without_multipolygons_ds + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + +@requires_shapely +def test_shapely_to_cf_for_polygons_without_holes( + geometry_polygon_without_holes_ds, +): + expected, in_da = geometry_polygon_without_holes_ds + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + @requires_shapely def test_shapely_to_cf_errors(): from shapely.geometry import Point, Polygon @@ -199,20 +350,18 @@ def test_shapely_to_cf_errors(): geoms = [ Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]), Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]), + Point(1, 2), ] - with pytest.raises( - NotImplementedError, match="Polygon geometry conversion is not implemented" - ): - cfxr.shapely_to_cf(geoms) - - geoms.append(Point(1, 2)) with pytest.raises(ValueError, match="Mixed geometry types are not supported"): cfxr.shapely_to_cf(geoms) - with pytest.raises( - NotImplementedError, match="Only grid mapping longitude_latitude" - ): - cfxr.shapely_to_cf([Point(4, 5)], grid_mapping="albers_conical_equal_area") + encoded = cfxr.shapely_to_cf( + [Point(4, 5)], grid_mapping="albers_conical_equal_area" + ) + assert encoded["x"].attrs["standard_name"] == "projection_x_coordinate" + assert encoded["y"].attrs["standard_name"] == "projection_y_coordinate" + for name in ["x", "y", "crd_x", "crd_y"]: + assert "grid_mapping" not in encoded[name].attrs @requires_shapely @@ -247,7 +396,7 @@ def test_cf_to_shapely_for_lines_without_multilines( in_ds, expected = geometry_line_without_multilines_ds actual = cfxr.cf_to_shapely(in_ds) assert actual.dims == ("index",) - xr.testing.assert_identical(actual, expected) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) in_ds = in_ds.assign_coords(index=["b", "c"]) actual = cfxr.cf_to_shapely(in_ds) @@ -258,12 +407,51 @@ def test_cf_to_shapely_for_lines_without_multilines( @requires_shapely -def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds): - in_ds, _ = geometry_ds - in_ds.geometry_container.attrs["geometry_type"] = "polygon" - with pytest.raises(NotImplementedError, match="Polygon geometry"): - cfxr.cf_to_shapely(in_ds) +def test_cf_to_shapely_for_polygons(geometry_polygon_ds): + in_ds, expected = geometry_polygon_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + +@requires_shapely +def test_cf_to_shapely_for_polygons_without_multipolygons( + geometry_polygon_without_multipolygons_ds, +): + in_ds, expected = geometry_polygon_without_multipolygons_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + in_ds = in_ds.assign_coords(index=["b", "c"]) + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical( + actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"]) + ) + + +@requires_shapely +def test_cf_to_shapely_for_polygons_without_holes( + geometry_polygon_without_holes_ds, +): + in_ds, expected = geometry_polygon_without_holes_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + in_ds = in_ds.assign_coords(index=["b", "c"]) + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical( + actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"]) + ) + + +@requires_shapely +def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds, geometry_polygon_ds): + in_ds, _ = geometry_ds in_ds.geometry_container.attrs["geometry_type"] = "punkt" with pytest.raises(ValueError, match="Valid CF geometry types are "): cfxr.cf_to_shapely(in_ds) @@ -273,6 +461,11 @@ def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds): with pytest.raises(ValueError, match="'node_count' must be provided"): cfxr.cf_to_shapely(in_ds) + in_ds, _ = geometry_polygon_ds + del in_ds.geometry_container.attrs["node_count"] + with pytest.raises(ValueError, match="'node_count' must be provided"): + cfxr.cf_to_shapely(in_ds) + @requires_shapely def test_reshape_unique_geometries(geometry_ds): @@ -297,3 +490,22 @@ def test_reshape_unique_geometries(geometry_ds): in_ds = in_ds.assign(geometry=geoms) with pytest.raises(ValueError, match="The geometry variable must be 1D"): cfxr.geometry.reshape_unique_geometries(in_ds) + + +@requires_shapely +def test_encode_decode(geometry_ds, polygon_geometry): + geom_dim_ds = xr.Dataset() + geom_dim_ds = geom_dim_ds.assign_coords( + xr.Coordinates( + coords={"geoms": xr.Variable("geoms", polygon_geometry.variable)}, + indexes={}, + ) + ).assign({"foo": ("geoms", [1, 2])}) + + polyds = ( + polygon_geometry.rename("polygons").rename({"index": "index2"}).to_dataset() + ) + multi_ds = xr.merge([polyds, geometry_ds[1]]) + for ds in (geometry_ds[1], polygon_geometry.to_dataset(), geom_dim_ds, multi_ds): + roundtripped = decode_geometries(encode_geometries(ds)) + xr.testing.assert_identical(ds, roundtripped) diff --git a/cf_xarray/tests/test_units.py b/cf_xarray/tests/test_units.py index fa34b1b2..36c677cd 100644 --- a/cf_xarray/tests/test_units.py +++ b/cf_xarray/tests/test_units.py @@ -51,7 +51,13 @@ def test_percent_units(): assert str(ureg("%").units) == "percent" -@pytest.mark.xfail(reason="not supported by pint, yet: hgrecco/pint#1295") +def test_integer_units(): + """Test that integer 1 units is equal to dimensionless""" + # need to explicitly use parse_units to bypass the runtime type checking + # in the quantity constructor + assert str(ureg.parse_units(1)) == "dimensionless" + + def test_udunits_power_syntax(): """Test that UDUNITS style powers are properly parsed and interpreted.""" assert ureg("m2 s-2").units == ureg.m**2 / ureg.s**2 @@ -69,11 +75,16 @@ def test_udunits_power_syntax_parse_units(): ("m ** -1", "m-1"), ("m ** 2 / s ** 2", "m2 s-2"), ("m ** 3 / (kg * s ** 2)", "m3 kg-1 s-2"), + ("", "1"), ), ) def test_udunits_format(units, expected): u = ureg.parse_units(units) + if units == "": + # The non-shortened dimensionless can only work with recent pint + pytest.importorskip("pint", minversion="0.24.1") + assert f"{u:~cf}" == expected assert f"{u:cf}" == expected diff --git a/cf_xarray/units.py b/cf_xarray/units.py index acd48fbe..e1d553e8 100644 --- a/cf_xarray/units.py +++ b/cf_xarray/units.py @@ -1,64 +1,60 @@ """Module to provide unit support via pint approximating UDUNITS/CF.""" + import functools import re import pint -from pint import ( # noqa: F401 - DimensionalityError, - UndefinedUnitError, - UnitStrippedWarning, -) +from packaging.version import Version from .utils import emit_user_level_warning -# from `xclim`'s unit support module with permission of the maintainers -try: - @pint.register_unit_format("cf") - def short_formatter(unit, registry, **options): - """Return a CF-compliant unit string from a `pint` unit. - - Parameters - ---------- - unit : pint.UnitContainer - Input unit. - registry : pint.UnitRegistry - the associated registry - **options - Additional options (may be ignored) - - Returns - ------- - out : str - Units following CF-Convention, using symbols. - """ - import re - - # convert UnitContainer back to Unit - unit = registry.Unit(unit) - # Print units using abbreviations (millimeter -> mm) - s = f"{unit:~D}" - - # Search and replace patterns - pat = r"(?P(?:1 )?/ )?(?P\w+)(?: \*\* (?P\d))?" - - def repl(m): - i, u, p = m.groups() - p = p or (1 if i else "") - neg = "-" if i else "" - - return f"{u}{neg}{p}" - - out, n = re.subn(pat, repl, s) - - # Remove multiplications - out = out.replace(" * ", " ") - # Delta degrees: - out = out.replace("Δ°", "delta_deg") - return out.replace("percent", "%") +@pint.register_unit_format("cf") +def short_formatter(unit, registry, **options): + """Return a CF-compliant unit string from a `pint` unit. + + Parameters + ---------- + unit : pint.UnitContainer + Input unit. + registry : pint.UnitRegistry + The associated registry + **options + Additional options (may be ignored) + + Returns + ------- + out : str + Units following CF-Convention, using symbols. + """ + # pint 0.24.1 gives {"dimensionless": 1} for non-shortened dimensionless units + # CF uses "1" to denote fractions and dimensionless quantities + if unit == {"dimensionless": 1} or not unit: + return "1" + + # If u is a name, get its symbol (same as pint's "~" pre-formatter) + # otherwise, assume a symbol (pint should have already raised on invalid units before this) + unit = pint.util.UnitsContainer( + { + registry._get_symbol(u) if u in registry._units else u: exp + for u, exp in unit.items() + } + ) + + # Change in formatter signature in pint 0.24 + if Version(pint.__version__) < Version("0.24"): + args = (unit.items(),) + else: + # Numerators splitted from denominators + args = ( + ((u, e) for u, e in unit.items() if e >= 0), + ((u, e) for u, e in unit.items() if e < 0), + ) + + out = pint.formatter(*args, as_ratio=False, product_fmt=" ", power_fmt="{}{}") + # To avoid potentiel unicode problems in netCDF. In both cases, this unit is not recognized by udunits + return out.replace("Δ°", "delta_deg") -except ImportError: - pass # ------ # Reused with modification from MetPy under the terms of the BSD 3-Clause License. @@ -77,7 +73,13 @@ def repl(m): ], force_ndarray_like=True, ) +# ----- end block copied from metpy +# need to insert to make sure this is the first preprocessor +# This ensures we convert integer `1` to string `"1"`, as needed by pint. +units.preprocessors.insert(0, str) + +# ----- units.define("percent = 0.01 = %") # Define commonly encountered units (both CF and non-CF) not defined by pint diff --git a/cf_xarray/utils.py b/cf_xarray/utils.py index 5644774c..5d040262 100644 --- a/cf_xarray/utils.py +++ b/cf_xarray/utils.py @@ -15,17 +15,35 @@ cftime = None +def _is_duck_dask_array(x): + """Return True if the input is a dask array.""" + # Code copied and simplified from xarray < 2024.02 (xr.core.pycompat.is_duck_dask_array) + try: + from dask.base import is_dask_collection + except ImportError: + return False + + return ( + is_dask_collection(x) + and hasattr(x, "ndim") + and hasattr(x, "shape") + and hasattr(x, "dtype") + and ( + (hasattr(x, "__array_function__") and hasattr(x, "__array_ufunc__")) + or hasattr(x, "__array_namespace__") + ) + ) + + def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects""" # Copied / adapted from xarray.core.common - from xarray.core.pycompat import is_duck_dask_array - if cftime is None: return False else: if array.dtype == np.dtype("O") and array.size > 0: sample = array.ravel()[0] - if is_duck_dask_array(sample): + if _is_duck_dask_array(sample): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() diff --git a/ci/doc.yml b/ci/doc.yml index 9481e505..361c02d2 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -19,6 +19,7 @@ dependencies: - pooch - pint - regex + - shapely - furo - myst-nb - pip: diff --git a/codecov.yml b/codecov.yml index aa84c676..ab70aee1 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,2 +1,3 @@ ignore: - "*/tests/*" + - "cf_xarray/datasets.py" diff --git a/doc/api.rst b/doc/api.rst index 8a9060dc..493a01a9 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -17,6 +17,17 @@ Top-level API encode_multi_index_as_compress decode_compress_to_multi_index +Geometries +---------- +.. autosummary:: + :toctree: generated/ + + geometry.decode_geometries + geometry.encode_geometries + geometry.shapely_to_cf + geometry.cf_to_shapely + geometry.GeometryNames + .. currentmodule:: xarray DataArray @@ -96,6 +107,7 @@ Attributes Dataset.cf.coordinates Dataset.cf.formula_terms Dataset.cf.grid_mapping_names + Dataset.cf.geometries Dataset.cf.standard_names .. _dsmeth: diff --git a/doc/bounds.md b/doc/bounds.md index 537aef7e..21d85fcd 100644 --- a/doc/bounds.md +++ b/doc/bounds.md @@ -4,12 +4,13 @@ # Bounds Variables -See - +```{seealso} +1. [CF conventions on coordinate bounds](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#cell-boundaries) 1. {py:attr}`Dataset.cf.bounds`, 1. {py:func}`Dataset.cf.add_bounds`, 1. {py:func}`cf_xarray.bounds_to_vertices`, 1. {py:func}`cf_xarray.vertices_to_bounds` +``` `cf_xarray` supports parsing [coordinate bounds](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#cell-boundaries) as encoded in the CF `bounds` attribute. A useful feature for incomplete dataset is also the automatic bounds estimation possible through `cf.add_bounds`. This method will estimate the missing bounds by finding the middle points between elements of the given coordinate, but also by extrapolating to find the outer bounds of the grid. This linear estimation works well with rectilinear grids, but it is only a coarse approximation for curvilinear and simple irregular grids. diff --git a/doc/coding.md b/doc/coding.md index 2ea05a69..6183781d 100644 --- a/doc/coding.md +++ b/doc/coding.md @@ -26,6 +26,10 @@ xr.set_options(display_expand_data=False) `cf_xarray` aims to support encoding and decoding variables using CF conventions not yet implemented by Xarray. +## Geometries + +See [](./geometry.md) for more. + ## Compression by gathering The ["compression by gathering"](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#compression-by-gathering) diff --git a/doc/dsg.md b/doc/dsg.md index 936480e8..c90e6fdc 100644 --- a/doc/dsg.md +++ b/doc/dsg.md @@ -23,6 +23,11 @@ xr.set_options(display_expand_data=False) # Discrete Sampling Geometries +```{seealso} +1. [CF conventions on Discrete Sampling Geometries](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.9/cf-conventions.html#discrete-sampling-geometries) +1. {py:attr}`Dataset.cf.cf_roles` +``` + `cf_xarray` supports identifying variables by the [`cf_role` attribute](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.9/cf-conventions.html#discrete-sampling-geometries). ```{code-cell} diff --git a/doc/flags.md b/doc/flags.md index 448cc99a..5e37df36 100644 --- a/doc/flags.md +++ b/doc/flags.md @@ -15,6 +15,10 @@ kernelspec: # Flag Variables +```{seealso} +1. [CF conventions on flag variables and masks](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#flags) +``` + `cf_xarray` has some support for [flag variables](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#flags), including flag masks. ## Flag Values diff --git a/doc/geometry.md b/doc/geometry.md index 1fddea17..e105c88b 100644 --- a/doc/geometry.md +++ b/doc/geometry.md @@ -1,7 +1,152 @@ +--- +jupytext: + text_representation: + format_name: myst +kernelspec: + display_name: Python 3 + name: python3 +--- + ```{eval-rst} .. currentmodule:: xarray ``` # Geometries -See {py:func}`cf_xarray.shapely_to_cf`, {py:func}`cf_xarray.cf_to_shapely` +```{seealso} +1. [The CF conventions on Geometries](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#geometries) +1. {py:attr}`Dataset.cf.geometries` +``` + +```{eval-rst} +.. currentmodule:: cf_xarray +``` + +First read an example dataset with CF-encoded geometries + +```{code-cell} +import cf_xarray as cfxr +import cf_xarray.datasets +import xarray as xr + +ds = cfxr.datasets.encoded_point_dataset() +ds +``` + +The {py:attr}`Dataset.cf.geometries` property will yield a mapping from geometry type to geometry container variable name. + +```{code-cell} +ds.cf.geometries +``` + +The `"geometry"` name is special, and will return the geometry *container* present in the dataset + +```{code-cell} +ds.cf["geometry"] +``` + +Request all variables needed to represent a geometry as a Dataset using the geometry type as key. + +```{code-cell} +ds.cf[["point"]] +``` + +You *must* request a Dataset as return type, that is provide the list `["point]`, because the CF conventions encode geometries across multiple variables with dimensions that are not present on all variables. Xarray's data model does *not* allow representing such a collection of variables as a DataArray. + +## Encoding & decoding + +`cf_xarray` can convert between vector geometries represented as shapely objects +and CF-compliant array representations of those geometries. + +Let's start by creating an xarray object containing some shapely geometries. This example uses +a `xr.DataArray` but these functions also work with a `xr.Dataset` where one of the data variables +contains an array of shapes. + +```{warning} +`cf_xarray` does not support handle multiple types of shapes (Point, Line, Polygon) in one +`xr.DataArray`, but multipart geometries are supported and can be mixed with single-part +geometries of the same type. +``` + +`cf-xarray` provides {py:func}`geometry.encode_geometries` and {py:func}`geometry.decode_geometries` to +encode and decode xarray Datasets to/from a CF-compliant form that can be written to any array storage format. + +For example, here is a Dataset with shapely geometries + +```{code-cell} +ds = cfxr.datasets.point_dataset() +ds +``` + +Encode with the CF-conventions + +```{code-cell} +encoded = cfxr.geometry.encode_geometries(ds) +encoded +``` + +This dataset can then be written to any format supported by Xarray. +To decode back to shapely geometries, reverse the process using {py:func}`geometry.decode_geometries` + +```{code-cell} +decoded = cfxr.geometry.decode_geometries(encoded) +ds.identical(decoded) +``` + +### Limitations + +The following limitations can be relaxed in the future. PRs welcome! + +1. cf-xarray uses `"geometry_container"` as the name for the geometry variable always. If there are multiple geometry variables then `"geometry_N"`is used where N is an integer >= 0. cf-xarray behaves similarly for all associated geometry variables names: i.e. `"node"`, `"node_count"`, `"part_node_count"`, `"part"`, `"interior_ring"`. `"x"`, `"y"` (with suffixes if needed) are always the node coordinate variable names, and `"crd_x"`, `"crd_y"` are the nominal X, Y coordinate locations. None of this is configurable at the moment. +1. CF xarray will not set the `"geometry"` attribute that links a variable to a geometry by default unless the geometry variable is a dimension coordinate for that variable. This heuristic works OK for vector data cubes (e.g. [xvec](https://xvec.readthedocs.io/en/stable/)). You should set the `"geometry"` attribute manually otherwise. Suggestions for better behaviour here are very welcome. + +## Lower-level conversions + +Encoding a single DataArray is possible using {py:func}`geometry.shapely_to_cf`. + +```{code-cell} +da = ds["geometry"] +ds_cf = cfxr.shapely_to_cf(da) +ds_cf +``` + +This function returns a `xr.Dataset` containing the CF fields needed to reconstruct the +geometries. In particular there are: + +- `'x'`, `'y'` : the node coordinates +- `'crd_x'`, `'crd_y'` : the feature coordinates (might have different names if `grid_mapping` is available). +- `'node_count'` : The number of nodes per feature. Always present for Lines and Polygons. For + Points: only present if there are multipart geometries. +- `'part_node_count'` : The number of nodes per individual geometry. Only for Lines with multipart + geometries and for Polygons with multipart geometries or holes. +- `'interior_ring'` : Integer boolean indicating whether ring is interior or exterior. Only for + Polygons with holes. +- `'geometry_container`' : Empty variable with attributes describing the geometry type. + +Here are the attributes on `geometry_container`. This pattern mimics the convention of +specifying spatial reference information in the attrs of the empty array `spatial_ref`. + +```{code-cell} +ds_cf.geometry_container.attrs +``` + +```{note} +Z axis is not yet supported for any shapes. +``` + +This `xr.Dataset` can be converted back into a `xr.DataArray` of shapely geometries: + +```{code-cell} +cfxr.cf_to_shapely(ds_cf) +``` + +This conversion adds coordinates that aren't in the `xr.DataArray` that we started with. +By default these are called `'crd_x'` and `'crd_y'` unless `grid_mapping` is specified. + +## Gotchas + +For MultiPolygons with holes the CF notation is slightly ambiguous on which hole is associated +with which polygon. This is problematic because shapely stores holes within the polygon +object that they are associated with. `cf_xarray` assumes that the shapes are interleaved +such that the holes (interior rings) are associated with the exteriors (exterior rings) that +immediately precede them. diff --git a/doc/grid_mappings.md b/doc/grid_mappings.md index a653f6cc..15e48379 100644 --- a/doc/grid_mappings.md +++ b/doc/grid_mappings.md @@ -13,10 +13,11 @@ kernelspec: # Grid Mappings -See - -1. {py:attr}`Dataset.cf.grid_mapping_names`, +```{seealso} +1. [CF conventions on grid mappings and projections](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#grid-mappings-and-projections) +1. {py:attr}`Dataset.cf.grid_mapping_names` 1. {py:attr}`DataArray.cf.grid_mapping_name` +``` `cf_xarray` understands the concept of coordinate projections using the [grid_mapping](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#grid-mappings-and-projections) attribute convention. For example, the dataset might contain two sets of coordinates: diff --git a/doc/parametricz.md b/doc/parametricz.md index d1115552..77f67924 100644 --- a/doc/parametricz.md +++ b/doc/parametricz.md @@ -22,6 +22,12 @@ xr.set_options(display_expand_data=False) # Parametric Vertical Coordinates +```{seealso} +1. [CF conventions on parametric vertical coordinates](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-vertical-coordinate) +2. {py:meth}`Dataset.cf.decode_vertical_coords` +3. {py:attr}`Dataset.cf.formula_terms` +``` + `cf_xarray` supports decoding [parametric vertical coordinates](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-vertical-coordinate) encoded in the `formula_terms` attribute using {py:meth}`Dataset.cf.decode_vertical_coords`. Right now, only the two ocean s-coordinates and `ocean_sigma_coordinate` are supported, but support for the [rest](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#parametric-v-coord) should be easy to add (Pull Requests are very welcome!). ## Decoding parametric coordinates diff --git a/doc/selecting.md b/doc/selecting.md index 93bd596f..0a06e25f 100644 --- a/doc/selecting.md +++ b/doc/selecting.md @@ -22,7 +22,15 @@ xr.set_options(display_expand_data=False) # Selecting DataArrays -A second powerful feature of `cf_xarray` is the ability select DataArrays using special "CF names" like the "latitude", or "longitude" coordinate names, "X" or "Y" axes names, oreven using the `standard_name` attribute if present. +```{seealso} +CF conventions on +1. [coordinate variables](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#coordinate-types) +1. [cell bounds and measures](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#_data_representative_of_cells) +1. [standard name](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#standard-name) +1. [ancillary data](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#ancillary-data) +``` + +A powerful feature of `cf_xarray` is the ability select DataArrays using special "CF names" like the "latitude", or "longitude" coordinate names, "X" or "Y" axes names, oreven using the `standard_name` attribute if present. To demonstrate this, let's load a few datasets diff --git a/doc/sgrid_ugrid.md b/doc/sgrid_ugrid.md index 62b4da57..6e840048 100644 --- a/doc/sgrid_ugrid.md +++ b/doc/sgrid_ugrid.md @@ -9,6 +9,11 @@ kernelspec: # SGRID / UGRID +```{seealso} +1. [SGRID conventions](https://sgrid.github.io/sgrid/) +1. [UGRID conventions](http://ugrid-conventions.github.io/ugrid-conventions/) +``` + ## SGRID `cf_xarray` can parse the attributes on the `grid_topology` variable to identify dimension names with axes names `X`, `Y`, `Z`. diff --git a/doc/units.md b/doc/units.md index 8dd07afd..884828fd 100644 --- a/doc/units.md +++ b/doc/units.md @@ -10,9 +10,13 @@ hide-toc: true # Units +```{seealso} +1. [CF conventions on units](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#units) +``` + The xarray ecosystem supports unit-aware arrays using [pint](https://pint.readthedocs.io) and [pint-xarray](https://pint-xarray.readthedocs.io). Some changes are required to make these packages work well with [UDUNITS format recommended by the CF conventions](http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#units). -`cf_xarray` makes those recommended changes when you `import cf_xarray.units`. These changes allow pint to parse and format UDUNIT units strings, and add several custom units like `degrees_north`, `psu` etc. +`cf_xarray` makes those recommended changes when you `import cf_xarray.units`. These changes allow pint to parse and format UDUNIT units strings, and add several custom units like `degrees_north` for latitude, `psu` for ocean salinity, etc. Be aware that pint supports some units that UDUNITS does not recognize but `cf-xarray` will not try to detect them and raise an error. For example, a temperature subtraction returns "delta_degC" units in pint, which does not exist in UDUNITS. ## Formatting units @@ -23,5 +27,5 @@ from pint import application_registry as ureg import cf_xarray.units u = ureg.Unit("m ** 3 / s ** 2") -f"{u:~cf}" +f"{u:cf}" # or {u:~cf}, both return the same short format ``` diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9660db5b..388c2e98 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,6 +3,23 @@ What's New ---------- +v0.8.10 (unreleased) +==================== +- Fix methods in ``utils`` to work with xarray >= 2024.02.0. By `Pascal Bourgault`_. + +v0.8.9 (Feb 06, 2023) +===================== +- Convert integer (e.g. ``1``) units to string (e.g. ``"1"``) for pint. By `Justus Magin`_. + +v0.8.8 (Jan 19, 2023) +===================== +- Add conversion between CF geometries and Shapely objects for polygons. By `Julia Signell`_. +- Support 32bit wide bit masks. By `Michael St Laurent`_. + +v0.8.7 (Dec 19, 2023) +===================== +- Add conversion between CF geometries and Shapely objects for lines. By `Julia Signell`_. + v0.8.5 (Oct 24, 2023) ===================== - Fix for ``get_bounds_dim_name``. By `Deepak Cherian`_. @@ -248,4 +265,6 @@ v0.1.3 .. _`Mathias Hauser`: https://github.com/mathause .. _`Aidan Heerdegen`: https://github.com/aidanheerdegen .. _`Clément Haëck`: https://github.com/Descanonge +.. _`Julia Signell`: https://github.com/jsignell +.. _`Michael St Laurent`: https://github.com/mps010160 .. _`flag variables`: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#flags diff --git a/pyproject.toml b/pyproject.toml index b6a663e9..fc07cfcb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -all = ["matplotlib", "pint", "shapely", "regex", "rich", "pooch"] +all = ["matplotlib", "pint >=0.18, !=0.24.0", "shapely", "regex", "rich", "pooch"] [project.urls] homepage = "https://cf-xarray.readthedocs.io" @@ -64,6 +64,8 @@ exclude = [ ".eggs", "doc", ] + +[tool.ruff.lint] # E402: module level import not at top of file # E501: line too long - let ruff worry about that ignore = [ @@ -72,7 +74,6 @@ ignore = [ "B018", "B015", ] -[tool.ruff.lint] select = [ # Pyflakes "F",