jbusecke · pre-commit-ci · Jan 6, 2025 · Jan 6, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-added-large-files
       - id: check-toml
@@ -13,7 +13,7 @@ repos:
       - id: check-yaml
       - id: debug-statements
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.5.0
+    rev: v0.8.6
     hooks:
       # Run the linter.
       - id: ruff

diff --git a/docs/drift_removal.ipynb b/docs/drift_removal.ipynb
@@ -43,6 +43,7 @@
    ],
    "source": [
     "from dask_gateway import Gateway\n",
+    "\n",
     "g = Gateway()\n",
     "running_clusters = g.list_clusters()\n",
     "print(running_clusters)\n",
@@ -77,6 +78,7 @@
    "source": [
     "from distributed import Client\n",
     "from dask_gateway import GatewayCluster\n",
+    "\n",
     "cluster = GatewayCluster()\n",
     "cluster.scale(30)\n",
     "cluster"
@@ -255,18 +257,22 @@
     }
    ],
    "source": [
-    "zkwargs = {'consolidated':True, 'use_cftime':True}\n",
-    "kwargs = {'zarr_kwargs':zkwargs, 'preprocess':combined_preprocessing, 'aggregate':False}\n",
+    "zkwargs = {\"consolidated\": True, \"use_cftime\": True}\n",
+    "kwargs = {\n",
+    "    \"zarr_kwargs\": zkwargs,\n",
+    "    \"preprocess\": combined_preprocessing,\n",
+    "    \"aggregate\": False,\n",
+    "}\n",
     "\n",
     "col = google_cmip_col()\n",
     "\n",
     "\n",
-    "cat = col.search(source_id='CanESM5-CanOE', variable_id='thetao')\n",
+    "cat = col.search(source_id=\"CanESM5-CanOE\", variable_id=\"thetao\")\n",
     "\n",
     "\n",
-    "ddict_historical = cat.search(experiment_id='historical').to_dataset_dict(**kwargs)\n",
-    "ddict_ssp585 = cat.search(experiment_id='ssp585').to_dataset_dict(**kwargs)\n",
-    "ddict_picontrol = cat.search(experiment_id='piControl').to_dataset_dict(**kwargs)"
+    "ddict_historical = cat.search(experiment_id=\"historical\").to_dataset_dict(**kwargs)\n",
+    "ddict_ssp585 = cat.search(experiment_id=\"ssp585\").to_dataset_dict(**kwargs)\n",
+    "ddict_picontrol = cat.search(experiment_id=\"piControl\").to_dataset_dict(**kwargs)"
    ]
   },
   {
@@ -286,12 +292,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds_control = ddict_picontrol['CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
-    "ds_historical = ddict_historical['CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
-    "ds_ssp585 = ddict_ssp585['ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
+    "ds_control = ddict_picontrol[\n",
+    "    \"CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
+    "]\n",
+    "ds_historical = ddict_historical[\n",
+    "    \"CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
+    "]\n",
+    "ds_ssp585 = ddict_ssp585[\n",
+    "    \"ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
+    "]\n",
     "\n",
     "# Pick a random location in x/y/z space to use as an exmple\n",
-    "roi = {'x':100,'y':220, 'lev':30}"
+    "roi = {\"x\": 100, \"y\": 220, \"lev\": 30}"
    ]
   },
   {
@@ -335,8 +347,8 @@
    ],
    "source": [
     "# ok lets just plot them together\n",
-    "ds_control.isel(**roi).thetao.plot(color='0.5')\n",
-    "ds_historical.isel(**roi).thetao.plot(color='C1')"
+    "ds_control.isel(**roi).thetao.plot(color=\"0.5\")\n",
+    "ds_historical.isel(**roi).thetao.plot(color=\"C1\")"
    ]
   },
   {
@@ -373,7 +385,7 @@
     }
    ],
    "source": [
-    "{k:v for k,v in ds_historical.attrs.items() if 'parent' in k}"
+    "{k: v for k, v in ds_historical.attrs.items() if \"parent\" in k}"
    ]
   },
   {
@@ -428,8 +440,8 @@
    ],
    "source": [
     "# ok lets just plot them together\n",
-    "ds_control_adj.isel(**roi).thetao.plot(color='0.5')\n",
-    "ds_historical_adj.isel(**roi).thetao.plot(color='C1')"
+    "ds_control_adj.isel(**roi).thetao.plot(color=\"0.5\")\n",
+    "ds_historical_adj.isel(**roi).thetao.plot(color=\"C1\")"
    ]
   },
   {
@@ -471,8 +483,8 @@
    ],
    "source": [
     "# ok lets just plot them together\n",
-    "ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
-    "ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
+    "ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
+    "ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
    ]
   },
   {
@@ -491,8 +503,9 @@
    "outputs": [],
    "source": [
     "from xmip.drift_removal import replace_time\n",
+    "\n",
     "# with the defaults it will just replace the dates with new ones which have time stamps at the beginning of the month.\n",
-    "ds_historical_adj = replace_time(ds_historical_adj) "
+    "ds_historical_adj = replace_time(ds_historical_adj)"
    ]
   },
   {
@@ -526,8 +539,8 @@
    ],
    "source": [
     "# ok lets just plot them together again\n",
-    "ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
-    "ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
+    "ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
+    "ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
    ]
   },
   {
@@ -564,7 +577,7 @@
    ],
    "source": [
     "for name, ds in ddict_historical.items():\n",
-    "    print(name, ds.attrs['branch_time_in_parent'])"
+    "    print(name, ds.attrs[\"branch_time_in_parent\"])"
    ]
   },
   {
@@ -598,14 +611,18 @@
     "# replace the timestamp with the first of the month for the control run and plot\n",
     "# we will also average the data yearly to remove some of the visual noise\n",
     "\n",
-    "plt.figure(figsize=[12,4])\n",
-    "replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,150*4)).plot(color='0.5')\n",
+    "plt.figure(figsize=[12, 4])\n",
+    "replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(\n",
+    "    time=slice(0, 150 * 4)\n",
+    ").plot(color=\"0.5\")\n",
     "\n",
-    "# now we loop through all the historical members, adjust the time and plot them in the same way, \n",
+    "# now we loop through all the historical members, adjust the time and plot them in the same way,\n",
     "# but only for the first 20 years\n",
     "for name, ds in ddict_historical.items():\n",
-    "    _, ds_adj = unify_time(ds_control, ds, adjust_to='parent')\n",
-    "    ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,30*4)).plot(color='C1')"
+    "    _, ds_adj = unify_time(ds_control, ds, adjust_to=\"parent\")\n",
+    "    ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0, 30 * 4)).plot(\n",
+    "        color=\"C1\"\n",
+    "    )"
    ]
   },
   {
@@ -654,9 +671,10 @@
     "# setting up the scratch bucket\n",
     "import os\n",
     "import fsspec\n",
-    "PANGEO_SCRATCH = os.environ['PANGEO_SCRATCH']+'cmip6_pp_demo'\n",
-    "path = f'{PANGEO_SCRATCH}/test_rechunked.zarr'\n",
-    "temp_path = f'{PANGEO_SCRATCH}/test_rechunked_temp.zarr'\n",
+    "\n",
+    "PANGEO_SCRATCH = os.environ[\"PANGEO_SCRATCH\"] + \"cmip6_pp_demo\"\n",
+    "path = f\"{PANGEO_SCRATCH}/test_rechunked.zarr\"\n",
+    "temp_path = f\"{PANGEO_SCRATCH}/test_rechunked_temp.zarr\"\n",
     "mapper = fsspec.get_mapper(path)\n",
     "mapper_temp = fsspec.get_mapper(temp_path)"
    ]
@@ -1506,27 +1524,30 @@
    "source": [
     "if not mapper.fs.exists(path):\n",
     "    # recompute the rechunked data into the scratch bucket (is only triggered when the temporary store was erased)\n",
-    "    \n",
+    "\n",
     "    # Remove the temp store if for some reason that still exists\n",
     "    if mapper.fs.exists(temp_path):\n",
     "        mapper.fs.rm(temp_path, recursive=True)\n",
     "    from rechunker import rechunk\n",
+    "\n",
     "    target_chunks = {\n",
-    "        'thetao': {'time':6012, 'lev':1, 'x':3, 'y':291},\n",
-    "        'x': {'x':3},\n",
-    "        'y': {'y':291},\n",
-    "        'lat': {'x':3, 'y':291},\n",
-    "        'lev': {'lev':1},\n",
-    "        'lon': {'x':3, 'y':291},\n",
-    "        'time': {'time':6012}, \n",
+    "        \"thetao\": {\"time\": 6012, \"lev\": 1, \"x\": 3, \"y\": 291},\n",
+    "        \"x\": {\"x\": 3},\n",
+    "        \"y\": {\"y\": 291},\n",
+    "        \"lat\": {\"x\": 3, \"y\": 291},\n",
+    "        \"lev\": {\"lev\": 1},\n",
+    "        \"lon\": {\"x\": 3, \"y\": 291},\n",
+    "        \"time\": {\"time\": 6012},\n",
     "    }\n",
-    "    max_mem = '1GB'\n",
+    "    max_mem = \"1GB\"\n",
     "\n",
-    "    array_plan = rechunk(ds_control[['thetao']], target_chunks, max_mem, mapper, temp_store=mapper_temp)\n",
+    "    array_plan = rechunk(\n",
+    "        ds_control[[\"thetao\"]], target_chunks, max_mem, mapper, temp_store=mapper_temp\n",
+    "    )\n",
     "    array_plan.execute(retries=10)\n",
-    "    \n",
+    "\n",
     "ds_control_rechunked = xr.open_zarr(mapper, use_cftime=True)\n",
-    "ds_control_rechunked    "
+    "ds_control_rechunked"
    ]
   },
   {
@@ -2061,8 +2082,8 @@
     }
    ],
    "source": [
-    "drift = calculate_drift(ds_control_rechunked, ds_historical, 'thetao') \n",
-    "drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
+    "drift = calculate_drift(ds_control_rechunked, ds_historical, \"thetao\")\n",
+    "drift = drift.load()  # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
     "drift"
    ]
   },
@@ -2116,13 +2137,18 @@
    "source": [
     "start = drift.trend_time_range.isel(bnds=0).data.tolist()\n",
     "stop = drift.trend_time_range.isel(bnds=1).data.tolist()\n",
-    "time = xr.cftime_range(start, stop, freq='1MS')\n",
+    "time = xr.cftime_range(start, stop, freq=\"1MS\")\n",
     "\n",
     "# cut the control it to the time over which the trend was calculated\n",
     "ds_control_cut = ds_control_rechunked.sel(time=slice(start, stop))\n",
     "\n",
     "# use the linear slope from the same point to construct a trendline\n",
-    "trendline = xr.DataArray((np.arange(len(time)) * drift.thetao.isel(**roi).data) + ds_control_cut.thetao.isel(**roi, time=0).data, dims=['time'], coords={'time':time})"
+    "trendline = xr.DataArray(\n",
+    "    (np.arange(len(time)) * drift.thetao.isel(**roi).data)\n",
+    "    + ds_control_cut.thetao.isel(**roi, time=0).data,\n",
+    "    dims=[\"time\"],\n",
+    "    coords={\"time\": time},\n",
+    ")"
    ]
   },
   {
@@ -2205,7 +2231,9 @@
     }
    ],
    "source": [
-    "control_detrended = remove_trend(ds_control, drift, 'thetao', ref_date=str(ds_control.time.data[0]))\n",
+    "control_detrended = remove_trend(\n",
+    "    ds_control, drift, \"thetao\", ref_date=str(ds_control.time.data[0])\n",
+    ")\n",
     "control_detrended.isel(**roi).plot()"
    ]
   },
@@ -2247,7 +2275,9 @@
     }
    ],
    "source": [
-    "ds_historical_dedrifted = remove_trend(ds_historical, drift, 'thetao', ref_date=str(ds_historical.time.data[0]))\n",
+    "ds_historical_dedrifted = remove_trend(\n",
+    "    ds_historical, drift, \"thetao\", ref_date=str(ds_historical.time.data[0])\n",
+    ")\n",
     "ds_historical_dedrifted.isel(**roi).plot()"
    ]
   },
@@ -2866,7 +2896,7 @@
     }
    ],
    "source": [
-    "ds_historical_dedrifted.attrs['drift_removed']"
+    "ds_historical_dedrifted.attrs[\"drift_removed\"]"
    ]
   },
   {
@@ -2930,10 +2960,10 @@
     "ds_ssp585_dedrifted = remove_trend(\n",
     "    ds_ssp585,\n",
     "    drift,\n",
-    "    'thetao',\n",
-    "    ref_date=str(ds_historical.time.data[0]) \n",
-    "    # Note that the ref_date is still the first time point of the *historical*run. \n",
-    "    # This ensures that the scenario is treated as an extension of the historical \n",
+    "    \"thetao\",\n",
+    "    ref_date=str(ds_historical.time.data[0]),\n",
+    "    # Note that the ref_date is still the first time point of the *historical*run.\n",
+    "    # This ensures that the scenario is treated as an extension of the historical\n",
     "    # run and the offset is calculated appropriately\n",
     ")"
    ]
@@ -2968,10 +2998,16 @@
     }
    ],
    "source": [
-    "ds_historical.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0', label='raw data')\n",
-    "ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0')\n",
-    "ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1', label='control drift removed')\n",
-    "ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1')"
+    "ds_historical.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(\n",
+    "    color=\"C0\", label=\"raw data\"\n",
+    ")\n",
+    "ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(color=\"C0\")\n",
+    "ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
+    "    color=\"C1\", label=\"control drift removed\"\n",
+    ")\n",
+    "ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
+    "    color=\"C1\"\n",
+    ")"
    ]
   },
   {