From 0961741c9f3e58989d21f10f09366664111caf2a Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Tue, 13 Dec 2022 22:53:56 +0000 Subject: [PATCH 1/5] added code to extract ensemble forecasts for each station --- notes/gefs_forecasts.qmd | 170 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 notes/gefs_forecasts.qmd diff --git a/notes/gefs_forecasts.qmd b/notes/gefs_forecasts.qmd new file mode 100644 index 00000000..033c1d3e --- /dev/null +++ b/notes/gefs_forecasts.qmd @@ -0,0 +1,170 @@ +--- +title: "Ensemble forecasts" +author: "David LeBauer" +format: html + df-print: paged +editor: source +--- + + +## gefs4cast + +### gefs4cast::noaa_gefs + + +```{r} +library(dplyr) +## set path for gdal binary +Sys.setenv("GDAL_BIN"="/usr/bin/") +#remotes::install_github("cct-datascience/azmetr") +#library(azmetr) +data(station_info, package = 'azmetr') +readr::write_csv(station_info %>% + dplyr::mutate(site_name = meta_station_name, + site_id = meta_station_id), + "new_station_info.csv") + + +gefs4cast::noaa_gefs(date = Sys.Date(), + cycle = "00", + threads = 70, + gdal_ops = "", + s3 = arrow::s3_bucket("drivers", endpoint_override = "data.ecoforecast.org"), + max_horizon = 48, + purge = TRUE, + quiet = FALSE, + dest = tempdir(),#".", + locations = "new_station_info.csv",#paste0("https://github.com/eco4cast/neon4cast-noaa-download/", + # "raw/master/noaa_download_site_list.csv"), + name_pattern = "noaa/gefs-v12/stage1/{cycle_int}/{nice_date}/part-0.parquet") +``` + + +### Vignette + +From https://projects.ecoforecast.org/gefs4cast/articles/gefs-cog.html +```{r} +library(gdalcubes) +# remotes::install_github("neon4cast/gefs4cast") +#remotes::install_github('eco4cast/gefs4cast') +library(gefs4cast) +library(stringr) +library(lubridate) +library(stars) +library(viridisLite) +library(tmap) + +gdalcubes_options(parallel = TRUE)#24) +``` + +```{r} +date <- Sys.Date() - lubridate::days(1)#as.Date("2022-12-05") + +gefsdir <- fs::dir_create("gefs") + +#gefs_cog(tmpdir, ens_avg=TRUE, max_horizon=240, date = date) + +gefs_cog(gefsdir, + ens_avg = FALSE, + max_horizon = 48, + date = date) |> + system.time() +``` + +```{r} +# 000 file has different bands and so cannot be stacked into the collection + + +gefs_cubes <- list() +for (i in 1:30) { + ens <- stringr::str_pad(i, 2, pad = '0') + ens_id <- paste0('gep', ens) + .f <- fs::dir_ls(gefsdir, + type="file", + recurse = TRUE, + regex = ens_id)[-1] + step <- .f |> str_extract("f\\d{3}\\.tif") |> str_extract("\\d{3}") + datetime <- Sys.Date() + hours(step) + + fc <- stars::read_stars(.f[1]) + bandnames <- + stars::st_get_dimension_values(fc, 3) |> + stringr::str_extract("([A-Z]+):") |> str_remove(":") + + gefs_cubes[[ens]] <- create_image_collection(.f, + date_time = datetime, + band_names = bandnames) +} +``` + + +#### Cube View + +```{r} + +azmet_pts <- station_info %>% + sf::st_as_sf(coords = c("longitude", "latitude"), crs = 4326) + +azmet_box <- azmet_pts %>% + sf::st_bbox() + +iso <-"%Y-%m-%dT%H:%M:%S" +ext <- list(t0 = as.character(min(datetime),iso), + t1 = as.character(max(datetime),iso), + left = azmet_box[1], right = azmet_box[3], + top = azmet_box[4], bottom = azmet_box[2]) +v <- cube_view(srs = "EPSG:4326", + extent = ext, + #dx = 0.5, dy = 0.5, # original resolution -- half-degree + dx = 0.1, + dy = 0.1, + dt = "PT1H", + #dt = "PT3H", # original resolution + aggregation = "mean", + resampling = "cubicspline" +) +``` + +```{r} + +preds <- list() +for(ens in names(gefs_cubes)){ + gefs_cube <- gefs_cubes[[ens]] + x <- raster_cube(gefs_cube, v) |> + extract_geom(sf = azmet_pts) + p <- azmet_pts + p$FID <- rownames(azmet_pts) + y <- merge(p, x, by = "FID") + preds[[ens]] <- y +} +ens_forecast <- dplyr::bind_rows(preds, .id = 'ens') +``` + +```{r} +library(ggplot2) + +ggplot(data = ens_forecast) + + geom_line(aes(x = time, y = TMP, group = ens)) + + facet_wrap(~meta_station_name, ncol = 6) + + theme_bw() + +ggplot(data = ens_forecast) + + geom_line(aes(x = time, y = TMP, color = meta_station_name)) + + facet_wrap(~ens, ncol = 6) + + #geom_sf(aes(fill = TMP)) + + facet_wrap(~meta_station_name, ncol = 6) + + scale_fill_viridis_c(option = "plasma", na.value = "white") + + theme_void() + + theme(legend.position = "none") +``` + +```{r} +x <- raster_cube(gefs_cube, v) |> extract_geom(azmet_pts) + +raster_cube(gefs_cube, v) |> + gdalcubes::select_bands("TMP") |> + animate( col = viridisLite::viridis, nbreaks=100, + fps=10, save_as = "temp.gif") +``` + From f334f5f1eea9b572aab10ee326f5c29634a0c8aa Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Wed, 14 Dec 2022 17:38:41 +0000 Subject: [PATCH 2/5] add summary stats --- notes/gefs_forecasts.qmd | 86 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/notes/gefs_forecasts.qmd b/notes/gefs_forecasts.qmd index 033c1d3e..50ecbb65 100644 --- a/notes/gefs_forecasts.qmd +++ b/notes/gefs_forecasts.qmd @@ -11,7 +11,6 @@ editor: source ### gefs4cast::noaa_gefs - ```{r} library(dplyr) ## set path for gdal binary @@ -25,18 +24,18 @@ readr::write_csv(station_info %>% "new_station_info.csv") -gefs4cast::noaa_gefs(date = Sys.Date(), - cycle = "00", - threads = 70, - gdal_ops = "", - s3 = arrow::s3_bucket("drivers", endpoint_override = "data.ecoforecast.org"), - max_horizon = 48, - purge = TRUE, - quiet = FALSE, - dest = tempdir(),#".", - locations = "new_station_info.csv",#paste0("https://github.com/eco4cast/neon4cast-noaa-download/", - # "raw/master/noaa_download_site_list.csv"), - name_pattern = "noaa/gefs-v12/stage1/{cycle_int}/{nice_date}/part-0.parquet") +# gefs4cast::noaa_gefs(date = Sys.Date(), +# cycle = "00", +# threads = 70, +# gdal_ops = "", +# s3 = arrow::s3_bucket("drivers", endpoint_override = "data.ecoforecast.org"), +# max_horizon = 48, +# purge = TRUE, +# quiet = FALSE, +# dest = tempdir(),#".", +# locations = "new_station_info.csv",#paste0("https://github.com/eco4cast/neon4cast-noaa-download/", +# # "raw/master/noaa_download_site_list.csv"), +# name_pattern = "noaa/gefs-v12/stage1/{cycle_int}/{nice_date}/part-0.parquet") ``` @@ -58,25 +57,22 @@ gdalcubes_options(parallel = TRUE)#24) ``` ```{r} -date <- Sys.Date() - lubridate::days(1)#as.Date("2022-12-05") - -gefsdir <- fs::dir_create("gefs") +date <- Sys.Date() - lubridate::days(1) +# may need to adjust for time zones? +#as.Date("2022-12-05") -#gefs_cog(tmpdir, ens_avg=TRUE, max_horizon=240, date = date) +gefsdir <- fs::dir_create(file.path("gefs", date)) gefs_cog(gefsdir, ens_avg = FALSE, - max_horizon = 48, + max_horizon = 48, # get next 48 h date = date) |> system.time() ``` ```{r} -# 000 file has different bands and so cannot be stacked into the collection - - gefs_cubes <- list() -for (i in 1:30) { +for (i in 1:30) { # ens 00 different? ens <- stringr::str_pad(i, 2, pad = '0') ens_id <- paste0('gep', ens) .f <- fs::dir_ls(gefsdir, @@ -120,7 +116,7 @@ v <- cube_view(srs = "EPSG:4326", dy = 0.1, dt = "PT1H", #dt = "PT3H", # original resolution - aggregation = "mean", + aggregation = "mean", # will only aggregate if multiple ensemble members are viewed resampling = "cubicspline" ) ``` @@ -148,20 +144,44 @@ ggplot(data = ens_forecast) + facet_wrap(~meta_station_name, ncol = 6) + theme_bw() -ggplot(data = ens_forecast) + - geom_line(aes(x = time, y = TMP, color = meta_station_name)) + - facet_wrap(~ens, ncol = 6) - - #geom_sf(aes(fill = TMP)) + - facet_wrap(~meta_station_name, ncol = 6) + - scale_fill_viridis_c(option = "plasma", na.value = "white") + - theme_void() + - theme(legend.position = "none") +ens_long <- ens_forecast |> + tidyr::pivot_longer(cols = -c(ens, FID, meta_station_name, meta_station_id, time, geometry), + names_to = "var", + values_to = "value") |> + mutate(time = lubridate::ymd_hms(time), + id = paste0(meta_station_name, var)) +a <- ggplot(data = ens_long) + + geom_smooth(aes(x = time, y = value, color = meta_station_name)) + + #geom_line(aes(x = time, y = value, color = meta_station_name, group = id), alpha = 0.25) + + facet_wrap(~var, ncol = 4, scales = 'free_y') + +ggsave(a, width = 20, height = 10, filename = 'fig.pdf') ``` +Next step: long w/ summaries +ens, FID, station, time, geom, var, median, min, max, ucl_95, lcl_95, ucl_99, lcl_99 + ```{r} -x <- raster_cube(gefs_cube, v) |> extract_geom(azmet_pts) +## create statistical summaries by station, variable, and time + +ens_stats <- ens_long %>% + group_by(meta_station_name, var, time) %>% + summarise(median = median(value), + min = min(value), + max = max(value), + ucl_95 = quantile(value, 0.975), + lcl_95 = quantile(value, 0.025), + ucl_99 = quantile(value, 0.995), + lcl_99 = quantile(value, 0.005)) %>% + ungroup() %>% + mutate(id = paste0(meta_station_name, var)) + +``` + +Look at the data as a movie: +- should add station points and maybe some county boundaries to this +```{r} raster_cube(gefs_cube, v) |> gdalcubes::select_bands("TMP") |> animate( col = viridisLite::viridis, nbreaks=100, From 9ac59cd62eec36ec9fef3a2def0c41d64f55eebc Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Wed, 14 Dec 2022 17:52:59 +0000 Subject: [PATCH 3/5] write out and plot summary stats --- notes/gefs_forecasts.qmd | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/notes/gefs_forecasts.qmd b/notes/gefs_forecasts.qmd index 50ecbb65..25f64e3e 100644 --- a/notes/gefs_forecasts.qmd +++ b/notes/gefs_forecasts.qmd @@ -159,7 +159,6 @@ ggsave(a, width = 20, height = 10, filename = 'fig.pdf') ``` Next step: long w/ summaries -ens, FID, station, time, geom, var, median, min, max, ucl_95, lcl_95, ucl_99, lcl_99 ```{r} ## create statistical summaries by station, variable, and time @@ -175,7 +174,39 @@ ens_stats <- ens_long %>% lcl_99 = quantile(value, 0.005)) %>% ungroup() %>% mutate(id = paste0(meta_station_name, var)) +# save above as csv +readr::write_csv(ens_stats, 'ens_stats.csv') +``` + +```{r} +## create a plot of ens_stats time vs. value, with color by station, and facet by variable + +a <- ggplot(data = ens_stats) + + geom_line(aes(x = time, y = median, color = meta_station_name)) + + geom_ribbon(aes(x = time, ymin = lcl_95, ymax = ucl_95, fill = meta_station_name), alpha = 0.25) + + geom_ribbon(aes(x = time, ymin = min, ymax = max, fill = meta_station_name), alpha = 0.05) + + facet_wrap(~var, ncol = 4, scales = 'free_y') +a +# make plot above wider aspect +ggsave(a, width = 20, height = 10, filename = 'fig.pdf') + +# above plot but saved as one file per station, using a for loop +for(stn in unique(ens_stats$meta_station_name)){ + a <- ggplot(data = ens_stats %>% filter(meta_station_name == stn)) + + geom_line(aes(x = time, y = median)) + + geom_ribbon(aes(x = time, ymin = lcl_95, ymax = ucl_95), alpha = 0.25) + + geom_ribbon(aes(x = time, ymin = min, ymax = max), alpha = 0.05) + + facet_wrap(~var, ncol = 4, scales = 'free_y') + ggsave(a, width = 20, height = 10, filename = paste0(stn, '.pdf')) +} +``` +```{r} + + +``` + +```{r} ``` From 6840a2cd0b01debe3a2070918ca8a8975a06f261 Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Wed, 14 Dec 2022 18:05:31 +0000 Subject: [PATCH 4/5] clean up plus more and better plots --- notes/gefs_forecasts.qmd | 83 +++++++++++----------------------------- 1 file changed, 23 insertions(+), 60 deletions(-) diff --git a/notes/gefs_forecasts.qmd b/notes/gefs_forecasts.qmd index 25f64e3e..adac36a2 100644 --- a/notes/gefs_forecasts.qmd +++ b/notes/gefs_forecasts.qmd @@ -7,42 +7,17 @@ editor: source --- -## gefs4cast +## Download ensemble forecasts for AZMET stations -### gefs4cast::noaa_gefs +The goal here is to get a forecast for today that we can use to evaluate data streaming in from the AZMET stations. ```{r} library(dplyr) ## set path for gdal binary Sys.setenv("GDAL_BIN"="/usr/bin/") #remotes::install_github("cct-datascience/azmetr") -#library(azmetr) data(station_info, package = 'azmetr') -readr::write_csv(station_info %>% - dplyr::mutate(site_name = meta_station_name, - site_id = meta_station_id), - "new_station_info.csv") - - -# gefs4cast::noaa_gefs(date = Sys.Date(), -# cycle = "00", -# threads = 70, -# gdal_ops = "", -# s3 = arrow::s3_bucket("drivers", endpoint_override = "data.ecoforecast.org"), -# max_horizon = 48, -# purge = TRUE, -# quiet = FALSE, -# dest = tempdir(),#".", -# locations = "new_station_info.csv",#paste0("https://github.com/eco4cast/neon4cast-noaa-download/", -# # "raw/master/noaa_download_site_list.csv"), -# name_pattern = "noaa/gefs-v12/stage1/{cycle_int}/{nice_date}/part-0.parquet") -``` - - -### Vignette -From https://projects.ecoforecast.org/gefs4cast/articles/gefs-cog.html -```{r} library(gdalcubes) # remotes::install_github("neon4cast/gefs4cast") #remotes::install_github('eco4cast/gefs4cast') @@ -56,16 +31,20 @@ library(tmap) gdalcubes_options(parallel = TRUE)#24) ``` + +Based on [gefs4cast vignette](https://projects.ecoforecast.org/gefs4cast/articles/gefs-cog.html) + +Download ensemble forecasts ```{r} date <- Sys.Date() - lubridate::days(1) # may need to adjust for time zones? -#as.Date("2022-12-05") +# yesterday's forecast starts today? gefsdir <- fs::dir_create(file.path("gefs", date)) gefs_cog(gefsdir, ens_avg = FALSE, - max_horizon = 48, # get next 48 h + max_horizon = 48, # get next 48 h. might only need 24h? date = date) |> system.time() ``` @@ -93,9 +72,7 @@ for (i in 1:30) { # ens 00 different? } ``` - -#### Cube View - +Create a view that downscales the data to 0.1 degree resolution, hourly. ```{r} azmet_pts <- station_info %>% @@ -121,6 +98,7 @@ v <- cube_view(srs = "EPSG:4326", ) ``` +Now generate a dataframe of all the predictions. ```{r} preds <- list() @@ -136,27 +114,6 @@ for(ens in names(gefs_cubes)){ ens_forecast <- dplyr::bind_rows(preds, .id = 'ens') ``` -```{r} -library(ggplot2) - -ggplot(data = ens_forecast) + - geom_line(aes(x = time, y = TMP, group = ens)) + - facet_wrap(~meta_station_name, ncol = 6) + - theme_bw() - -ens_long <- ens_forecast |> - tidyr::pivot_longer(cols = -c(ens, FID, meta_station_name, meta_station_id, time, geometry), - names_to = "var", - values_to = "value") |> - mutate(time = lubridate::ymd_hms(time), - id = paste0(meta_station_name, var)) -a <- ggplot(data = ens_long) + - geom_smooth(aes(x = time, y = value, color = meta_station_name)) + - #geom_line(aes(x = time, y = value, color = meta_station_name, group = id), alpha = 0.25) + - facet_wrap(~var, ncol = 4, scales = 'free_y') - -ggsave(a, width = 20, height = 10, filename = 'fig.pdf') -``` Next step: long w/ summaries @@ -180,33 +137,39 @@ readr::write_csv(ens_stats, 'ens_stats.csv') ```{r} ## create a plot of ens_stats time vs. value, with color by station, and facet by variable +library(ggplot2) a <- ggplot(data = ens_stats) + geom_line(aes(x = time, y = median, color = meta_station_name)) + geom_ribbon(aes(x = time, ymin = lcl_95, ymax = ucl_95, fill = meta_station_name), alpha = 0.25) + geom_ribbon(aes(x = time, ymin = min, ymax = max, fill = meta_station_name), alpha = 0.05) + facet_wrap(~var, ncol = 4, scales = 'free_y') -a -# make plot above wider aspect +# make plot above with wide aspect ggsave(a, width = 20, height = 10, filename = 'fig.pdf') # above plot but saved as one file per station, using a for loop -for(stn in unique(ens_stats$meta_station_name)){ - a <- ggplot(data = ens_stats %>% filter(meta_station_name == stn)) + +plots <- list() +for(stn in unique(ens_stats$meta_station_id)){ + plots[[stn]] <- ggplot(data = ens_stats %>% filter(meta_station_name == stn)) + geom_line(aes(x = time, y = median)) + geom_ribbon(aes(x = time, ymin = lcl_95, ymax = ucl_95), alpha = 0.25) + geom_ribbon(aes(x = time, ymin = min, ymax = max), alpha = 0.05) + facet_wrap(~var, ncol = 4, scales = 'free_y') - ggsave(a, width = 20, height = 10, filename = paste0(stn, '.pdf')) + ggsave(plots[[stn]], width = 20, height = 10, filename = paste0(stn, '.pdf')) } + +```{r fig.width=40, fig.height=20} +# with list above use cowplot to combine into one pdf +library(cowplot) +p <- plot_grid(plotlist = plots, ncol = 6, labels = names(plots)) +ggsave(p, width = 40, height = 20, filename = 'all.pdf') ``` ```{r} - - ``` ```{r} + ``` From 71329f0e026e4691904a4f6ed9449545c6e2529e Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Wed, 14 Dec 2022 18:06:35 +0000 Subject: [PATCH 5/5] typo --- notes/gefs_forecasts.qmd | 1 - 1 file changed, 1 deletion(-) diff --git a/notes/gefs_forecasts.qmd b/notes/gefs_forecasts.qmd index adac36a2..116ee9a0 100644 --- a/notes/gefs_forecasts.qmd +++ b/notes/gefs_forecasts.qmd @@ -158,7 +158,6 @@ for(stn in unique(ens_stats$meta_station_id)){ ggsave(plots[[stn]], width = 20, height = 10, filename = paste0(stn, '.pdf')) } -```{r fig.width=40, fig.height=20} # with list above use cowplot to combine into one pdf library(cowplot) p <- plot_grid(plotlist = plots, ncol = 6, labels = names(plots))