Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #371

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ci:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-added-large-files
- id: check-toml
Expand All @@ -13,7 +13,7 @@ repos:
- id: check-yaml
- id: debug-statements
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
rev: v0.8.6
hooks:
# Run the linter.
- id: ruff
Expand Down
148 changes: 92 additions & 56 deletions docs/drift_removal.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
],
"source": [
"from dask_gateway import Gateway\n",
"\n",
"g = Gateway()\n",
"running_clusters = g.list_clusters()\n",
"print(running_clusters)\n",
Expand Down Expand Up @@ -77,6 +78,7 @@
"source": [
"from distributed import Client\n",
"from dask_gateway import GatewayCluster\n",
"\n",
"cluster = GatewayCluster()\n",
"cluster.scale(30)\n",
"cluster"
Expand Down Expand Up @@ -255,18 +257,22 @@
}
],
"source": [
"zkwargs = {'consolidated':True, 'use_cftime':True}\n",
"kwargs = {'zarr_kwargs':zkwargs, 'preprocess':combined_preprocessing, 'aggregate':False}\n",
"zkwargs = {\"consolidated\": True, \"use_cftime\": True}\n",
"kwargs = {\n",
" \"zarr_kwargs\": zkwargs,\n",
" \"preprocess\": combined_preprocessing,\n",
" \"aggregate\": False,\n",
"}\n",
"\n",
"col = google_cmip_col()\n",
"\n",
"\n",
"cat = col.search(source_id='CanESM5-CanOE', variable_id='thetao')\n",
"cat = col.search(source_id=\"CanESM5-CanOE\", variable_id=\"thetao\")\n",
"\n",
"\n",
"ddict_historical = cat.search(experiment_id='historical').to_dataset_dict(**kwargs)\n",
"ddict_ssp585 = cat.search(experiment_id='ssp585').to_dataset_dict(**kwargs)\n",
"ddict_picontrol = cat.search(experiment_id='piControl').to_dataset_dict(**kwargs)"
"ddict_historical = cat.search(experiment_id=\"historical\").to_dataset_dict(**kwargs)\n",
"ddict_ssp585 = cat.search(experiment_id=\"ssp585\").to_dataset_dict(**kwargs)\n",
"ddict_picontrol = cat.search(experiment_id=\"piControl\").to_dataset_dict(**kwargs)"
]
},
{
Expand All @@ -286,12 +292,18 @@
"metadata": {},
"outputs": [],
"source": [
"ds_control = ddict_picontrol['CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
"ds_historical = ddict_historical['CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
"ds_ssp585 = ddict_ssp585['ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
"ds_control = ddict_picontrol[\n",
" \"CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
"]\n",
"ds_historical = ddict_historical[\n",
" \"CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
"]\n",
"ds_ssp585 = ddict_ssp585[\n",
" \"ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
"]\n",
"\n",
"# Pick a random location in x/y/z space to use as an exmple\n",
"roi = {'x':100,'y':220, 'lev':30}"
"roi = {\"x\": 100, \"y\": 220, \"lev\": 30}"
]
},
{
Expand Down Expand Up @@ -335,8 +347,8 @@
],
"source": [
"# ok lets just plot them together\n",
"ds_control.isel(**roi).thetao.plot(color='0.5')\n",
"ds_historical.isel(**roi).thetao.plot(color='C1')"
"ds_control.isel(**roi).thetao.plot(color=\"0.5\")\n",
"ds_historical.isel(**roi).thetao.plot(color=\"C1\")"
]
},
{
Expand Down Expand Up @@ -373,7 +385,7 @@
}
],
"source": [
"{k:v for k,v in ds_historical.attrs.items() if 'parent' in k}"
"{k: v for k, v in ds_historical.attrs.items() if \"parent\" in k}"
]
},
{
Expand Down Expand Up @@ -428,8 +440,8 @@
],
"source": [
"# ok lets just plot them together\n",
"ds_control_adj.isel(**roi).thetao.plot(color='0.5')\n",
"ds_historical_adj.isel(**roi).thetao.plot(color='C1')"
"ds_control_adj.isel(**roi).thetao.plot(color=\"0.5\")\n",
"ds_historical_adj.isel(**roi).thetao.plot(color=\"C1\")"
]
},
{
Expand Down Expand Up @@ -471,8 +483,8 @@
],
"source": [
"# ok lets just plot them together\n",
"ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
"ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
]
},
{
Expand All @@ -491,8 +503,9 @@
"outputs": [],
"source": [
"from xmip.drift_removal import replace_time\n",
"\n",
"# with the defaults it will just replace the dates with new ones which have time stamps at the beginning of the month.\n",
"ds_historical_adj = replace_time(ds_historical_adj) "
"ds_historical_adj = replace_time(ds_historical_adj)"
]
},
{
Expand Down Expand Up @@ -526,8 +539,8 @@
],
"source": [
"# ok lets just plot them together again\n",
"ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
"ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
"ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
]
},
{
Expand Down Expand Up @@ -564,7 +577,7 @@
],
"source": [
"for name, ds in ddict_historical.items():\n",
" print(name, ds.attrs['branch_time_in_parent'])"
" print(name, ds.attrs[\"branch_time_in_parent\"])"
]
},
{
Expand Down Expand Up @@ -598,14 +611,18 @@
"# replace the timestamp with the first of the month for the control run and plot\n",
"# we will also average the data yearly to remove some of the visual noise\n",
"\n",
"plt.figure(figsize=[12,4])\n",
"replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,150*4)).plot(color='0.5')\n",
"plt.figure(figsize=[12, 4])\n",
"replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(\n",
" time=slice(0, 150 * 4)\n",
").plot(color=\"0.5\")\n",
"\n",
"# now we loop through all the historical members, adjust the time and plot them in the same way, \n",
"# now we loop through all the historical members, adjust the time and plot them in the same way,\n",
"# but only for the first 20 years\n",
"for name, ds in ddict_historical.items():\n",
" _, ds_adj = unify_time(ds_control, ds, adjust_to='parent')\n",
" ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,30*4)).plot(color='C1')"
" _, ds_adj = unify_time(ds_control, ds, adjust_to=\"parent\")\n",
" ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0, 30 * 4)).plot(\n",
" color=\"C1\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -654,9 +671,10 @@
"# setting up the scratch bucket\n",
"import os\n",
"import fsspec\n",
"PANGEO_SCRATCH = os.environ['PANGEO_SCRATCH']+'cmip6_pp_demo'\n",
"path = f'{PANGEO_SCRATCH}/test_rechunked.zarr'\n",
"temp_path = f'{PANGEO_SCRATCH}/test_rechunked_temp.zarr'\n",
"\n",
"PANGEO_SCRATCH = os.environ[\"PANGEO_SCRATCH\"] + \"cmip6_pp_demo\"\n",
"path = f\"{PANGEO_SCRATCH}/test_rechunked.zarr\"\n",
"temp_path = f\"{PANGEO_SCRATCH}/test_rechunked_temp.zarr\"\n",
"mapper = fsspec.get_mapper(path)\n",
"mapper_temp = fsspec.get_mapper(temp_path)"
]
Expand Down Expand Up @@ -1506,27 +1524,30 @@
"source": [
"if not mapper.fs.exists(path):\n",
" # recompute the rechunked data into the scratch bucket (is only triggered when the temporary store was erased)\n",
" \n",
"\n",
" # Remove the temp store if for some reason that still exists\n",
" if mapper.fs.exists(temp_path):\n",
" mapper.fs.rm(temp_path, recursive=True)\n",
" from rechunker import rechunk\n",
"\n",
" target_chunks = {\n",
" 'thetao': {'time':6012, 'lev':1, 'x':3, 'y':291},\n",
" 'x': {'x':3},\n",
" 'y': {'y':291},\n",
" 'lat': {'x':3, 'y':291},\n",
" 'lev': {'lev':1},\n",
" 'lon': {'x':3, 'y':291},\n",
" 'time': {'time':6012}, \n",
" \"thetao\": {\"time\": 6012, \"lev\": 1, \"x\": 3, \"y\": 291},\n",
" \"x\": {\"x\": 3},\n",
" \"y\": {\"y\": 291},\n",
" \"lat\": {\"x\": 3, \"y\": 291},\n",
" \"lev\": {\"lev\": 1},\n",
" \"lon\": {\"x\": 3, \"y\": 291},\n",
" \"time\": {\"time\": 6012},\n",
" }\n",
" max_mem = '1GB'\n",
" max_mem = \"1GB\"\n",
"\n",
" array_plan = rechunk(ds_control[['thetao']], target_chunks, max_mem, mapper, temp_store=mapper_temp)\n",
" array_plan = rechunk(\n",
" ds_control[[\"thetao\"]], target_chunks, max_mem, mapper, temp_store=mapper_temp\n",
" )\n",
" array_plan.execute(retries=10)\n",
" \n",
"\n",
"ds_control_rechunked = xr.open_zarr(mapper, use_cftime=True)\n",
"ds_control_rechunked "
"ds_control_rechunked"
]
},
{
Expand Down Expand Up @@ -2061,8 +2082,8 @@
}
],
"source": [
"drift = calculate_drift(ds_control_rechunked, ds_historical, 'thetao') \n",
"drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
"drift = calculate_drift(ds_control_rechunked, ds_historical, \"thetao\")\n",
"drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
"drift"
]
},
Expand Down Expand Up @@ -2116,13 +2137,18 @@
"source": [
"start = drift.trend_time_range.isel(bnds=0).data.tolist()\n",
"stop = drift.trend_time_range.isel(bnds=1).data.tolist()\n",
"time = xr.cftime_range(start, stop, freq='1MS')\n",
"time = xr.cftime_range(start, stop, freq=\"1MS\")\n",
"\n",
"# cut the control it to the time over which the trend was calculated\n",
"ds_control_cut = ds_control_rechunked.sel(time=slice(start, stop))\n",
"\n",
"# use the linear slope from the same point to construct a trendline\n",
"trendline = xr.DataArray((np.arange(len(time)) * drift.thetao.isel(**roi).data) + ds_control_cut.thetao.isel(**roi, time=0).data, dims=['time'], coords={'time':time})"
"trendline = xr.DataArray(\n",
" (np.arange(len(time)) * drift.thetao.isel(**roi).data)\n",
" + ds_control_cut.thetao.isel(**roi, time=0).data,\n",
" dims=[\"time\"],\n",
" coords={\"time\": time},\n",
")"
]
},
{
Expand Down Expand Up @@ -2205,7 +2231,9 @@
}
],
"source": [
"control_detrended = remove_trend(ds_control, drift, 'thetao', ref_date=str(ds_control.time.data[0]))\n",
"control_detrended = remove_trend(\n",
" ds_control, drift, \"thetao\", ref_date=str(ds_control.time.data[0])\n",
")\n",
"control_detrended.isel(**roi).plot()"
]
},
Expand Down Expand Up @@ -2247,7 +2275,9 @@
}
],
"source": [
"ds_historical_dedrifted = remove_trend(ds_historical, drift, 'thetao', ref_date=str(ds_historical.time.data[0]))\n",
"ds_historical_dedrifted = remove_trend(\n",
" ds_historical, drift, \"thetao\", ref_date=str(ds_historical.time.data[0])\n",
")\n",
"ds_historical_dedrifted.isel(**roi).plot()"
]
},
Expand Down Expand Up @@ -2866,7 +2896,7 @@
}
],
"source": [
"ds_historical_dedrifted.attrs['drift_removed']"
"ds_historical_dedrifted.attrs[\"drift_removed\"]"
]
},
{
Expand Down Expand Up @@ -2930,10 +2960,10 @@
"ds_ssp585_dedrifted = remove_trend(\n",
" ds_ssp585,\n",
" drift,\n",
" 'thetao',\n",
" ref_date=str(ds_historical.time.data[0]) \n",
" # Note that the ref_date is still the first time point of the *historical*run. \n",
" # This ensures that the scenario is treated as an extension of the historical \n",
" \"thetao\",\n",
" ref_date=str(ds_historical.time.data[0]),\n",
" # Note that the ref_date is still the first time point of the *historical*run.\n",
" # This ensures that the scenario is treated as an extension of the historical\n",
" # run and the offset is calculated appropriately\n",
")"
]
Expand Down Expand Up @@ -2968,10 +2998,16 @@
}
],
"source": [
"ds_historical.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0', label='raw data')\n",
"ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0')\n",
"ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1', label='control drift removed')\n",
"ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1')"
"ds_historical.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(\n",
" color=\"C0\", label=\"raw data\"\n",
")\n",
"ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(color=\"C0\")\n",
"ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
" color=\"C1\", label=\"control drift removed\"\n",
")\n",
"ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
" color=\"C1\"\n",
")"
]
},
{
Expand Down
Loading
Loading