From 84db708d1eb09e7d14695d3492c2d37b3d199968 Mon Sep 17 00:00:00 2001 From: Conor Anderson Date: Fri, 21 Dec 2018 00:12:21 -0500 Subject: [PATCH] Overhaul to move most of the code to plumber. (Temporarily include code to migrate from mariadb to plumber) Squashed commit of the following: commit ed9a8c011e9611c5098b23fe953369db1c691f90 Author: Conor Anderson Date: Fri Dec 21 04:22:08 2018 +0000 Update .gitlab-ci.yml and appveyor.yml commit 9f4d8b092623585948c32a6f9f1c5b0f4ae28ddd Author: Conor Anderson Date: Thu Dec 20 18:03:46 2018 -0500 Clean-up and improvements. commit abb1630d2ae6a7058a0e677669825d086888f13f Author: Conor Anderson Date: Thu Dec 20 13:16:30 2018 -0500 Fix missing year arg. commit c391e898db1c8cca6d8efa2a756f15ca0eaf5fe7 Author: Conor Anderson Date: Thu Dec 20 13:12:15 2018 -0500 Clean up docker stuff. commit d7a1a3a473de879c7b038581b9e8810cf041aec2 Author: Conor Anderson Date: Wed Dec 19 23:32:13 2018 -0500 Add missing import commit 9f768affc2a91aa385a52213479655d9f0329d62 Author: Conor Anderson Date: Wed Dec 19 23:00:59 2018 -0500 Move to plumber, part 2 commit 7e7f08134adf9f3d8778148a08e99f55011dee9c Author: Conor Anderson Date: Wed Dec 19 22:59:29 2018 -0500 Move to plumber, part 1 --- .Rbuildignore | 3 + .gitignore | 1 + .gitlab-ci.yml | 2 +- DESCRIPTION | 12 +- NAMESPACE | 24 +--- R/download_data.R | 47 ++----- R/fix_bad_data.R | 5 +- R/get_catalogue.R | 19 +-- R/map_stations.R | 24 ++-- R/qc.R | 13 +- R/senamhiR.R | 1 + R/station_search.R | 21 +-- README.Rmd | 14 +- README.md | 201 ++++++++++++++------------- appveyor.yml | 17 ++- docker-compose.yml | 10 ++ man/download_data.Rd | 6 +- plumber/Dockerfile | 16 +++ R/clean_table.R => plumber/plumber.R | 109 +++++++++++++-- tests/testthat/test-download_data.R | 2 +- 20 files changed, 312 insertions(+), 235 deletions(-) create mode 100644 docker-compose.yml create mode 100644 plumber/Dockerfile rename R/clean_table.R => plumber/plumber.R (62%) diff --git a/.Rbuildignore b/.Rbuildignore index 91d95f4..4f5c75e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,6 @@ ^README\..*$ ^LICENSE\.md$ ^install_senamhiR\.R$ +^plumber$ +^docker-compose.yml$ +^updateip.sh$ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 83b0965..52fd99f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ .directory senamhiR.Rproj README.html +updateip.sh \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 08ddc15..640735f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,6 +9,6 @@ r-base: - apt-get update -qq - apt-get install -y --no-install-recommends libmariadbclient-dev libcurl4-openssl-dev libssh2-1-dev libssl-dev libxml2-dev git - Rscript -e 'install.packages(c("devtools", "roxygen2", "testthat", "covr"), repos = "https://cran.rstudio.com/")' - - Rscript -e 'devtools::install_deps()' + - Rscript -e 'devtools::install_deps(dependencies = TRUE)' - Rscript -e 'devtools::check()' - Rscript -e 'covr::codecov(type = "all", quiet = FALSE)' diff --git a/DESCRIPTION b/DESCRIPTION index fb9b52f..3a9e3f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: senamhiR Type: Package Title: A Collection of Functions to Obtain Peruvian Climate Data -Version: 0.6.5 -Date: 2018-06-27 +Version: 0.7.0 +Date: 2018-12-20 Authors@R: c(person(given = c("Conor", "I."), family = "Anderson", role = c("aut","cre"), email = "conor.anderson@utoronto.ca"), person(given = c("William", "A."), family = "Gough", role = "ths", @@ -13,15 +13,15 @@ Description: A collection of functions to obtain archived Peruvian climatologica Depends: R (>= 3.1.0) Imports: - DBI, dplyr, geosphere, - leaflet, - RMySQL, + httr, + rlang, tibble, utils, zoo -Suggests: +Suggests: + leaflet, testthat, covr License: GPL (>= 3) diff --git a/NAMESPACE b/NAMESPACE index e422d3c..f4960b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,12 +7,6 @@ export(qc) export(quick_audit) export(senamhiR) export(station_search) -importFrom(DBI,dbConnect) -importFrom(DBI,dbDisconnect) -importFrom(DBI,dbGetQuery) -importFrom(DBI,dbListTables) -importFrom(DBI,dbReadTable) -importFrom(RMySQL,MySQL) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) importFrom(dplyr,bind_rows) @@ -21,20 +15,14 @@ importFrom(dplyr,mutate) importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(geosphere,distGeo) -importFrom(leaflet,WMSTileOptions) -importFrom(leaflet,addAwesomeMarkers) -importFrom(leaflet,addCircleMarkers) -importFrom(leaflet,addTiles) -importFrom(leaflet,addWMSTiles) -importFrom(leaflet,awesomeIcons) -importFrom(leaflet,leaflet) -importFrom(leaflet,leafletCRS) -importFrom(leaflet,leafletOptions) -importFrom(leaflet,setView) +importFrom(httr,GET) +importFrom(httr,POST) +importFrom(httr,add_headers) +importFrom(httr,content) +importFrom(httr,stop_for_status) +importFrom(rlang,.data) importFrom(stats,sd) importFrom(tibble,add_column) -importFrom(tibble,as_tibble) -importFrom(tibble,has_name) importFrom(tibble,tibble) importFrom(utils,glob2rx) importFrom(zoo,as.yearmon) diff --git a/R/download_data.R b/R/download_data.R index 7bd3af2..5de18f1 100644 --- a/R/download_data.R +++ b/R/download_data.R @@ -1,6 +1,6 @@ -##' @title Access data from the Peruvian National Hydrological and Meterological Service via MySQL +##' @title Access data from the Peruvian National Hydrological and Meterological Service via API ##' -##' @description Download Peruvian historical climate data from the Senamhi via a MySQL archive. +##' @description Download archived Peruvian historical climate data from the Senamhi via an independent API. ##' ##' @param station character; the station id number to process. ##' @param year numeric; an ordered vector of years to retrieve. @@ -9,47 +9,18 @@ ##' ##' @author Conor I. Anderson ##' -##' @importFrom DBI dbConnect dbDisconnect dbGetQuery dbListTables dbReadTable -##' @importFrom RMySQL MySQL -##' @importFrom tibble as_tibble +##' @importFrom httr add_headers content POST stop_for_status ##' ##' @export ##' ##' @examples ##' \dontrun{download_data('000401')} -download_data <- function(station, year) { - - catalogue <- .get_catalogue() - - if (nchar(station) < 6) { - station <- suppressWarnings(try(sprintf("%06d", as.numeric(station)), silent = TRUE)) - if (inherits(station, "try-error") | !station %in% catalogue$StationID) { - stop("Station ID appears invalid.") - } - } - - station_data <- catalogue[catalogue$StationID == station, ] - type = station_data$Type - config = station_data$Configuration - conn <- dbConnect(MySQL(), user = "anonymous", host = "pcd.conr.ca", dbname = "pcd") - on.exit(dbDisconnect(conn)) - - sql_table <- paste0("ID_", station) - if (sum(dbListTables(conn) %in% sql_table) != 1) { - dbDisconnect(conn) - stop("There was an error getting that table.") - } - - if (missing(year) || is.null(year)) { - dat <- as_tibble(dbReadTable(conn, sql_table, row.names = NULL)) - } else { - start <- min(year) - end <- max(year) - dat <- as_tibble(dbGetQuery(conn, paste0("SELECT * FROM ", sql_table, " WHERE Fecha BETWEEN \"", start, "-01-01\" AND \"", end, "-12-31\";"))) - } - dat <- .clean_table(dat, config, type, clean_names = TRUE, fix_types = TRUE) - - dat +download_data <- function(station, year = NULL) { + r <- POST("https://api.conr.ca/pcd/get", + body = list(station = station, year = year), encode = "json", + config = list(add_headers(accept = "application/octet-stream"))) + stop_for_status(r) + unserialize(content(r)) } diff --git a/R/fix_bad_data.R b/R/fix_bad_data.R index d55a304..c1e72c3 100644 --- a/R/fix_bad_data.R +++ b/R/fix_bad_data.R @@ -2,9 +2,10 @@ # First, let's see where this is happening. yearmon <- as.yearmon(bad_table$Fecha[bad_row]) # Let's look at all months other than the bad month - context <- filter(bad_table, !as.yearmon(Fecha) == yearmon & format(Fecha, format = "%m") == format(yearmon, format = "%m")) + context <- filter(bad_table, !as.yearmon(.data$Fecha) == yearmon & + format(.data$Fecha, format = "%m") == format(yearmon, format = "%m")) # Let's make sure we don't have other bad data - context <- filter(context, var > -50 & var < 50) + context <- filter(context, .data$var > -50 & .data$var < 50) context <- unlist(context$var) # We should now have *fairly* clean context bad_data <- unlist(bad_table$var[bad_row]) diff --git a/R/get_catalogue.R b/R/get_catalogue.R index dc76321..65d4022 100644 --- a/R/get_catalogue.R +++ b/R/get_catalogue.R @@ -2,24 +2,15 @@ #' #' @return tbl_df #' -#' @importFrom DBI dbConnect dbDisconnect dbReadTable -#' @importFrom RMySQL MySQL -#' @importFrom tibble as_tibble +#' @importFrom httr content GET stop_for_status #' #' @keywords internal #' #' @author Conor I. Anderson .get_catalogue <- function() { - conn <- dbConnect(MySQL(), user = "anonymous", host = "pcd.conr.ca", dbname = "pcd") - on.exit(dbDisconnect(conn)) - cat <- try(as_tibble(dbReadTable(conn, "catalogue", row.names = NULL))) - if (inherits(cat, "try-error")) { - warning(paste("We couldn't download the catalogue.", - "These results might be slightly outdated.")) - return(catalogue) - } else { - names(cat) <- names(catalogue) - cat - } + r <- GET("https://api.conr.ca/pcd/catalogue", + config = list(add_headers(accept = "application/octet-stream"))) + stop_for_status(r) + unserialize(content(r)) } \ No newline at end of file diff --git a/R/map_stations.R b/R/map_stations.R index 928fb0e..218ad10 100644 --- a/R/map_stations.R +++ b/R/map_stations.R @@ -6,7 +6,7 @@ ##' @param type character; either "osm" for OpenStreetMap tiles, or "sentinel" for cloudless satellite by EOX IT Services GmbH (\url{https://s2maps.eu}). ##' ##' @importFrom dplyr "%>%" filter -##' @importFrom leaflet addAwesomeMarkers addCircleMarkers addTiles addWMSTiles awesomeIcons leaflet leafletCRS leafletOptions setView WMSTileOptions +##' @importFrom rlang .data ##' ##' @export ##' @@ -20,6 +20,11 @@ map_stations <- function(station, type = "osm") { + if (!requireNamespace("leaflet", quietly = TRUE)) { + stop("The \"leaflet\" packge is required to create maps. Please install it.", + call. = FALSE) + } + catalogue <- .get_catalogue() if (!inherits(station, "data.frame")) { @@ -33,10 +38,10 @@ map_stations <- function(station, type = "osm") { stop("One or more requested stations invalid.") } - station <- filter(catalogue, StationID %in% station) + station <- filter(catalogue, .data$StationID %in% station) } - icons <- awesomeIcons( + icons <- leaflet::awesomeIcons( icon = unname(sapply(station$Configuration, function(x) { if (x %in% c("M", "M1", "M2")) "thermometer" else "waterdrop" })), @@ -48,22 +53,23 @@ map_stations <- function(station, type = "osm") { ) map <- if (type == "sentinel") { - leaflet(station, options = leafletOptions(crs = leafletCRS("L.CRS.EPSG4326"))) %>% - addWMSTiles( + leaflet::leaflet(station, options = leaflet::leafletOptions( + crs = leaflet::leafletCRS("L.CRS.EPSG4326"))) %>% + leaflet::addWMSTiles( "https://tiles.maps.eox.at/wms?service=wms", layers = "s2cloudless", - options = WMSTileOptions(format = "image/jpeg"), + options = leaflet::WMSTileOptions(format = "image/jpeg"), attribution = paste("Sentinel-2 cloudless - https://s2maps.eu by EOX", "IT Services GmbH (Contains modified Copernicus", "Sentinel data 2016 & 2017)") ) } else { if (type != "osm") warning("Unrecognized map type. Defaulting to osm.") - leaflet(station) %>% addTiles() + leaflet::leaflet(station) %>% leaflet::addTiles() } map <- map %>% - addAwesomeMarkers(~Longitude, ~Latitude, icon = icons, + leaflet::addAwesomeMarkers(~Longitude, ~Latitude, icon = icons, label = paste0(station$StationID, " - ", station$Station, " (", station$Configuration, ")", @@ -73,7 +79,7 @@ map_stations <- function(station, type = "osm") { # Add a target if it exists target <- c(attr(station, "target_lon"), attr(station, "target_lat")) if (!is.null(target)) { - map <- map %>% addCircleMarkers(lng = target[1], lat = target[2], + map <- map %>% leaflet::addCircleMarkers(lng = target[1], lat = target[2], color = "red", label = paste0("Target: ", target[2], ", ", diff --git a/R/qc.R b/R/qc.R index 1cc61da..8ad3a84 100644 --- a/R/qc.R +++ b/R/qc.R @@ -9,6 +9,7 @@ ##' ##' @importFrom dplyr select filter ##' @importFrom tibble add_column +##' @importFrom rlang .data ##' @importFrom stats sd ##' ##' @export @@ -24,7 +25,7 @@ qc <- function(dat) { if (length(dat) > 1L) { stop("Sorry, for now this script can only process one station at a time.") } else { - dat <- try(read_data(dat)) + dat <- download_data(dat) } } @@ -33,7 +34,7 @@ qc <- function(dat) { } if (grepl("Observations", colnames(dat)[15])) { - observations <- select(dat, 15) %>% unlist + observations <- select(dat, 15) %>% unlist() } else { observations <- rep(NA, nrow(dat)) } @@ -44,7 +45,7 @@ qc <- function(dat) { if (length(maxshifts) > 0) { for (i in 1:length(maxshifts)) { - bad_table <- select(dat, Fecha, var = `Tmax (C)`) + bad_table <- select(dat, .data$Fecha, var = "Tmax (C)") fixes <- .fix_bad_data(bad_table, maxshifts[i], "Tmax", "dps") dat$`Tmax (C)`[maxshifts[i]] <- unlist(fixes[1]) existingobs <- if (!is.na(observations[maxshifts[i]]) && observations[maxshifts[i]] != '') paste(observations[maxshifts[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" @@ -54,7 +55,7 @@ qc <- function(dat) { if (length(minshifts) > 0) { for (i in 1:length(minshifts)) { - bad_table <- select(dat, Fecha, var = `Tmin (C)`) + bad_table <- select(dat, .data$Fecha, var = "Tmin (C)") fixes <- .fix_bad_data(bad_table, minshifts[i], "Tmin", "dps") dat$`Tmin (C)`[minshifts[i]] <- unlist(fixes[1]) existingobs <- if (!is.na(observations[minshifts[i]]) && observations[minshifts[i]] != '') paste(observations[minshifts[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" @@ -68,14 +69,14 @@ qc <- function(dat) { for (i in 1:length(minmaxerr)) { # First check Tmax - bad_table <- select(dat, Fecha, var = `Tmax (C)`) + bad_table <- select(dat, .data$Fecha, var = "Tmax (C)") fixes <- .fix_bad_data(bad_table, minmaxerr[i], "Tmax", "mme") dat$`Tmax (C)`[minmaxerr[i]] <- unlist(fixes[1]) existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" observations[minmaxerr[i]] <- paste0(existingobs, unlist(fixes[2])) # Repeat the same for Tmin - bad_table <- select(dat, Fecha, var = `Tmin (C)`) + bad_table <- select(dat, .data$Fecha, var = "Tmin (C)") fixes <- .fix_bad_data(bad_table, minmaxerr[i], "Tmin", "mme") dat$`Tmin (C)`[minmaxerr[i]] <- unlist(fixes[1]) existingobs <- if (!is.na(observations[minmaxerr[i]]) && observations[minmaxerr[i]] != '') paste(observations[minmaxerr[i]], ifelse((unlist(fixes[2]) != ''), "/ ", "")) else "" diff --git a/R/senamhiR.R b/R/senamhiR.R index 9834c55..5e8aef7 100644 --- a/R/senamhiR.R +++ b/R/senamhiR.R @@ -36,6 +36,7 @@ senamhiR <- function(station, year, collapse = FALSE) { } pull_data <- function(stn, year) { + rtn <- download_data(stn, year) attributes(rtn) <- append(attributes(rtn), catalogue[catalogue$StationID == stn,]) rownames(rtn) <- NULL diff --git a/R/station_search.R b/R/station_search.R index c2e9e55..779e864 100644 --- a/R/station_search.R +++ b/R/station_search.R @@ -17,6 +17,7 @@ ##' ##' @importFrom dplyr arrange filter mutate rowwise ##' @importFrom geosphere distGeo +##' @importFrom rlang .data ##' @importFrom utils glob2rx ##' ##' @export @@ -55,12 +56,12 @@ station_search <- function(name = NULL, ignore.case = TRUE, glob = FALSE, region if (!is.null(name)) { if (glob) name <- glob2rx(name) if (length(name) > 1) name <- paste(name, collapse = "|") - filt <- filter(filt, grepl(name, Station, ignore.case = ignore.case, ...)) + filt <- filter(filt, grepl(name, .data$Station, ignore.case = ignore.case, ...)) } # If `region` is not NULL, filter by name if (!is.null(region)) { - filt <- filter(filt, Region == toupper(region)) + filt <- filter(filt, .data$Region == toupper(region)) if (nrow(filt) == 0) { stop("No data found for that region. Did you spell it correctly?") } @@ -68,7 +69,7 @@ station_search <- function(name = NULL, ignore.case = TRUE, glob = FALSE, region # If `config` is not NULL, filter by config if (!is.null(config)) { - filt <- filter(filt, grepl(config, Configuration, ignore.case = ignore.case, ...)) + filt <- filter(filt, grepl(config, .data$Configuration, ignore.case = ignore.case, ...)) if (nrow(filt) == 0) { stop("No data found for that config. Did you pass \"m\" or \"h\"?") } @@ -77,9 +78,10 @@ station_search <- function(name = NULL, ignore.case = TRUE, glob = FALSE, region # If `period` is not NULL, filter by available data if (!is.null(period)) { if (length(period) == 1) { - filt <- filter(filt, `Period (Yr)` >= period) + filt <- filter(filt, .data$`Period (Yr)` >= period) } else { - filt <- filter(filt, `Data Start` <= min(period) & `Data End` >= max(period)) + filt <- filter(filt, .data$`Data Start` <= min(period) & + .data$`Data End` >= max(period)) } if (nrow(filt) == 0) { stop("No station was found for the specified period.") @@ -89,14 +91,15 @@ station_search <- function(name = NULL, ignore.case = TRUE, glob = FALSE, region # If `target` is not NULL, filter by distance to target if (!is.null(target)) { if (length(target) == 1L) { - p1 <- catalogue %>% filter(StationID == target) %>% select(Longitude, Latitude) %>% unlist + p1 <- catalogue %>% filter(.data$StationID == target) %>% + select(.data$Longitude, .data$Latitude) %>% unlist() } else if (length(target) == 2L) { p1 <- c(target[2], target[1]) } else stop("error: check target format") filt <- rowwise(filt) %>% - mutate(Dist = distGeo(p1, c(Longitude, Latitude))/1000) %>% - filter(Dist >= min(dist) & Dist <= max(dist)) - if (sort == TRUE) filt <- arrange(filt, Dist) + mutate(Dist = distGeo(p1, c(.data$Longitude, .data$Latitude))/1000) %>% + filter(.data$Dist >= min(dist) & .data$Dist <= max(dist)) + if (sort == TRUE) filt <- arrange(filt, .data$Dist) attr(filt, "target_lon") <- p1[1] attr(filt, "target_lat") <- p1[2] } diff --git a/README.Rmd b/README.Rmd index 9b80d02..1927eb1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -10,26 +10,26 @@ library(senamhiR) library(dplyr) ``` -The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of April 10, 2018. This data was originally converted from HTML, and is stored in a MySQL database in tibble format. +The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of April 10, 2018. This data was originally converted from HTML, and is now accessible via an API hosted by the package author. It is important to note that the info on the Senamhi website has not undergone quality control, however, this package includes a helper function to perform the most common quality control operations for the temperature variables. More functions will be added in the future. ## Installing -This package is under active development, and is not available from the official Comprehensive R Archive Network (CRAN). To make installation easier, I have written a script that will install the `git2r` and `remotes` packages (if necessary), and then install `senamhiR` and all dependencies. Use the following command to run this script: +This package is under active development, and is not available from the official Comprehensive R Archive Network (CRAN). To make installation easier, I have written a script that should facilitate the installation of the package and its dependencies. Use the following command to run this script: ``` {r, eval = FALSE} source("https://gitlab.com/ConorIA/senamhiR/raw/master/install_senamhiR.R") ``` _Note: It is always a good idea to review code before you run it. Click the URL in the above command to see the commands that we will run to install._ -Once the packages have installed, load `senamhiR` by: +Once the packages have installed, load **senamhiR** by: ``` {r, eval = FALSE} library(senamhiR) ``` ## Basic workflow -The functions contained in the `senamhiR` functions allow for the discovery and visualization of meteorological and hydrological stations, and the acquisition of daily climate data from these stations. +The functions contained in the **senamhiR** functions allow for the discovery and visualization of meteorological and hydrological stations, and the acquisition of daily climate data from these stations. ### `station_search()` @@ -39,9 +39,9 @@ To search for a station by name, use the `station_search()` function. For instan station_search("Santa") ``` -Note that the `tibble` object (a special sort of `data.frame`) won't print more than the first 10 rows by default. To see all of the results, you can wrap the command in `View()` so that it becomes `View(find_station("Santa"))`. +Note that the `tbl_df` object (a special sort of `data.frame`) won't print more than the first 10 rows by default. To see all of the results, you can wrap the command in `View()` so that it becomes `View(find_station("Santa"))`. -Note that you can also use wildcards as supported by the `glob2rx()` from the `utils` package by passing the argument `glob = TRUE`, as in the following example. +Note that you can also use wildcards as supported by the `glob2rx()` from the **utils** package by passing the argument `glob = TRUE`, as in the following example. ```{r} station_search("San*", glob = TRUE) @@ -87,7 +87,7 @@ Make sure to use the assignment operator (`<-`) to save the data into an R objec ### `map_stations()` -Sometimes a long list of stations is hard to visualize spatially. The `map_stations()` function helps to overcome this. This function takes a list of stations and shows them on a map powered by the [Leaflet](http://leafletjs.com/) library. Like the previous function, the map function is even smart enough to take a search as its list of stations as per the example below. +Sometimes a long list of stations is hard to visualize spatially. The `map_stations()` function helps to overcome this. This function takes a list of stations and shows them on a map powered by the [Leaflet](http://leafletjs.com/) library. Like the previous function, the map function is even smart enough to take a search as its list of stations as per the example below. Note that this mapping functionality requires the **leaflet** package to be installed, and it is not included as a dependency of **senamhiR**. #### Show a map of all stations that are between 30 and 50 km of Machu Picchu ```{r, eval=FALSE} diff --git a/README.md b/README.md index b43e3d6..b192c20 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,8 @@ The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of April 10, 2018. This data was originally converted -from HTML, and is stored in a MySQL database in tibble format. +from HTML, and is now accessible via an API hosted by the package +author. It is important to note that the info on the Senamhi website has not undergone quality control, however, this package includes a helper @@ -16,9 +17,9 @@ temperature variables. More functions will be added in the future. This package is under active development, and is not available from the official Comprehensive R Archive Network (CRAN). To make installation -easier, I have written a script that will install the `git2r` and -`remotes` packages (if necessary), and then install `senamhiR` and all -dependencies. Use the following command to run this +easier, I have written a script that should facilitate the installation +of the package and its dependencies. Use the following command to run +this script: ``` r @@ -29,7 +30,7 @@ source("https://gitlab.com/ConorIA/senamhiR/raw/master/install_senamhiR.R") the URL in the above command to see the commands that we will run to install.* -Once the packages have installed, load `senamhiR` by: +Once the packages have installed, load **senamhiR** by: ``` r library(senamhiR) @@ -37,7 +38,7 @@ library(senamhiR) ## Basic workflow -The functions contained in the `senamhiR` functions allow for the +The functions contained in the **senamhiR** functions allow for the discovery and visualization of meteorological and hydrological stations, and the acquisition of daily climate data from these stations. @@ -52,29 +53,29 @@ station_search("Santa") ``` ## # A tibble: 43 x 14 - ## Station StationID Type Configuration `Data Start` `Data End` - ## - ## 1 SANTA MARIA DE N… 000256 CON M 1951 2018 - ## 2 SANTA 000433 CON M 1964 1986 - ## 3 SANTA ISABEL DE … 158201 CON M 1964 1982 - ## 4 SANTA RITA 000829 CON M 1977 1992 - ## 5 SANTA ELENA 000834 CON M 1963 1973 - ## 6 SANTA CRUZ DE HO… 113248 SUT M 2015 2016 - ## 7 SANTA CATALINA D… 153200 CON M 1963 1983 - ## 8 SANTA CRUZ 000351 CON M 1963 2018 - ## 9 HACIENDA SANTA I… 000766 CON M 1954 1955 - ## 10 SANTA ANA 000515 CON M 1942 1947 + ## Station StationID Type Configuration `Data Start` `Data End` + ## + ## 1 SANTA … 000256 CON M 1951 2018 + ## 2 SANTA 000433 CON M 1964 1986 + ## 3 SANTA … 158201 CON M 1964 1982 + ## 4 SANTA … 000829 CON M 1977 1992 + ## 5 SANTA … 000834 CON M 1963 1973 + ## 6 SANTA … 113248 SUT M 2015 2016 + ## 7 SANTA … 153200 CON M 1963 1983 + ## 8 SANTA … 000351 CON M 1963 2018 + ## 9 HACIEN… 000766 CON M 1954 1955 + ## 10 SANTA … 000515 CON M 1942 1947 ## # ... with 33 more rows, and 8 more variables: `Period (Yr)` , ## # `Station Status` , Latitude , Longitude , ## # Altitude , Region , Province , District -Note that the `tibble` object (a special sort of `data.frame`) won’t +Note that the `tbl_df` object (a special sort of `data.frame`) won’t print more than the first 10 rows by default. To see all of the results, you can wrap the command in `View()` so that it becomes `View(find_station("Santa"))`. Note that you can also use wildcards as supported by the `glob2rx()` -from the `utils` package by passing the argument `glob = TRUE`, as in +from the **utils** package by passing the argument `glob = TRUE`, as in the following example. ``` r @@ -82,18 +83,18 @@ station_search("San*", glob = TRUE) ``` ## # A tibble: 139 x 14 - ## Station StationID Type Configuration `Data Start` `Data End` - ## - ## 1 SAN RAFAEL 152222 CON M 1965 1966 - ## 2 SANTA MARIA DE N… 000256 CON M 1951 2018 - ## 3 SAN PEDRO 211404 CON H 2009 2018 - ## 4 SANTIAGO ANTUNEZ… 000426 CON M 1998 2018 - ## 5 SAN DIEGO 000420 CON M 1959 1962 - ## 6 SAN LORENZO # 5 000430 CON M 1966 1972 - ## 7 SAN JACINTO DE N… 000424 CON M 1956 1968 - ## 8 SAN JACINTO 201901 CON H 1947 1990 - ## 9 SANTA 000433 CON M 1964 1986 - ## 10 SANTA ISABEL DE … 158201 CON M 1964 1982 + ## Station StationID Type Configuration `Data Start` `Data End` + ## + ## 1 SAN RA… 152222 CON M 1965 1966 + ## 2 SANTA … 000256 CON M 1951 2018 + ## 3 SAN PE… 211404 CON H 2009 2018 + ## 4 SANTIA… 000426 CON M 1998 2018 + ## 5 SAN DI… 000420 CON M 1959 1962 + ## 6 SAN LO… 000430 CON M 1966 1972 + ## 7 SAN JA… 000424 CON M 1956 1968 + ## 8 SAN JA… 201901 CON H 1947 1990 + ## 9 SANTA 000433 CON M 1964 1986 + ## 10 SANTA … 158201 CON M 1964 1982 ## # ... with 129 more rows, and 8 more variables: `Period (Yr)` , ## # `Station Status` , Latitude , Longitude , ## # Altitude , Region , Province , District @@ -111,18 +112,18 @@ station_search(region = "SAN MARTIN") ``` ## # A tibble: 72 x 14 - ## Station StationID Type Configuration `Data Start` `Data End` - ## - ## 1 CUZCO 000389 CON M 1966 1970 - ## 2 DOS DE MAYO (J. … 153225 CON M 1963 2018 - ## 3 CUZCO-BIAVO 153345 CON M 1996 2018 - ## 4 BIAVO 221804 CON H 1969 2018 - ## 5 LA UNION 000384 CON M 1970 2018 - ## 6 NUEVO LIMA 153312 CON M 1963 2017 - ## 7 BELLAVISTA 000382 CON M 1963 2018 - ## 8 SAN PABLO 153307 CON M 1967 2018 - ## 9 SISA 000381 CON M 1964 1988 - ## 10 ALAO 003308 CON M 1972 2018 + ## Station StationID Type Configuration `Data Start` `Data End` + ## + ## 1 CUZCO 000389 CON M 1966 1970 + ## 2 DOS DE… 153225 CON M 1963 2018 + ## 3 CUZCO-… 153345 CON M 1996 2018 + ## 4 BIAVO 221804 CON H 1969 2018 + ## 5 LA UNI… 000384 CON M 1970 2018 + ## 6 NUEVO … 153312 CON M 1963 2017 + ## 7 BELLAV… 000382 CON M 1963 2018 + ## 8 SAN PA… 153307 CON M 1967 2018 + ## 9 SISA 000381 CON M 1964 1988 + ## 10 ALAO 003308 CON M 1972 2018 ## # ... with 62 more rows, and 8 more variables: `Period (Yr)` , ## # `Station Status` , Latitude , Longitude , ## # Altitude , Region , Province , District @@ -134,18 +135,18 @@ station_search("Santa", period = 1971:2000) ``` ## # A tibble: 10 x 14 - ## Station StationID Type Configuration `Data Start` `Data End` - ## - ## 1 SANTA MARIA DE N… 000256 CON M 1951 2018 - ## 2 SANTA CRUZ 000351 CON M 1963 2018 - ## 3 SANTA CRUZ 155202 CON M 1963 2018 - ## 4 SANTA EULALIA 155213 CON M 1963 2018 - ## 5 SANTA ROSA 000536 CON M 1967 2006 - ## 6 SANTA CRUZ 152303 CON M 1963 2008 - ## 7 SANTA RITA DE CA… 152401 CON M 1963 2018 - ## 8 SANTA MARIA DE N… 152409 CON M 1963 2018 - ## 9 SANTA CLOTILDE 000177 CON M 1963 2017 - ## 10 SANTA ROSA 000823 CON M 1956 2017 + ## Station StationID Type Configuration `Data Start` `Data End` + ## + ## 1 SANTA … 000256 CON M 1951 2018 + ## 2 SANTA … 000351 CON M 1963 2018 + ## 3 SANTA … 155202 CON M 1963 2018 + ## 4 SANTA … 155213 CON M 1963 2018 + ## 5 SANTA … 000536 CON M 1967 2006 + ## 6 SANTA … 152303 CON M 1963 2008 + ## 7 SANTA … 152401 CON M 1963 2018 + ## 8 SANTA … 152409 CON M 1963 2018 + ## 9 SANTA … 000177 CON M 1963 2017 + ## 10 SANTA … 000823 CON M 1956 2017 ## # ... with 8 more variables: `Period (Yr)` , `Station Status` , ## # Latitude , Longitude , Altitude , Region , ## # Province , District @@ -157,18 +158,18 @@ station_search(target = "000401", dist = 0:100) ``` ## # A tibble: 58 x 15 - ## Station StationID Type Configuration `Data Start` `Data End` - ## - ## 1 TARAPOTO 000401 CON M 1998 2018 - ## 2 CUNUMBUQUE 153311 CON M 1963 2018 - ## 3 CUMBAZA 221801 CON H 1968 2018 - ## 4 LAMAS 000383 CON M 1963 2018 - ## 5 SAN ANTONIO 153314 CON M 1963 2018 - ## 6 SHANAO 221802 CON H 1965 2018 - ## 7 SHANAO 210006 SUT H 2016 2018 - ## 8 SHANAO 153328 CON M 2002 2018 - ## 9 TABALOSOS 000322 CON M 1963 2018 - ## 10 EL PORVENIR 000310 CON M 1964 2018 + ## Station StationID Type Configuration `Data Start` `Data End` + ## + ## 1 TARAPO… 000401 CON M 1998 2018 + ## 2 CUNUMB… 153311 CON M 1963 2018 + ## 3 CUMBAZA 221801 CON H 1968 2018 + ## 4 LAMAS 000383 CON M 1963 2018 + ## 5 SAN AN… 153314 CON M 1963 2018 + ## 6 SHANAO 221802 CON H 1965 2018 + ## 7 SHANAO 210006 SUT H 2016 2018 + ## 8 SHANAO 153328 CON M 2002 2018 + ## 9 TABALO… 000322 CON M 1963 2018 + ## 10 EL POR… 000310 CON M 1964 2018 ## # ... with 48 more rows, and 9 more variables: `Period (Yr)` , ## # `Station Status` , Latitude , Longitude , ## # Altitude , Region , Province , District , @@ -181,28 +182,28 @@ station_search(target = c(-13.163333, -72.545556), dist = 0:50) ``` ## # A tibble: 20 x 15 - ## Station StationID Type Configuration `Data Start` `Data End` - ## - ## 1 MACHU PICCHU 000679 CON M 1964 2018 - ## 2 HUYRO 000678 CON M 1964 1981 - ## 3 CHILCA 472A9204 SUT M 2015 2018 - ## 4 ECHARATE 000716 CON M 1981 1982 - ## 5 MARANURA 000676 CON M 1970 1978 - ## 6 OLLANTAYTAMBO 47295014 SUT M 2011 2014 - ## 7 QUILLABAMBA 4729B3E6 SUT M1 2000 2018 - ## 8 QUILLABAMBA 000606 CON M 1964 2018 - ## 9 OCOBAMBA 000681 CON M 1964 1983 - ## 10 MOLLEPATA 000680 CON M 1963 1978 - ## 11 CUNYAC 156224 CON M 2002 2018 - ## 12 ECHARATE 156300 CON M 1963 1981 - ## 13 PUENTE CUNYA 472C44A2 SUT H 2016 2018 - ## 14 PUENTE CUNYAC 230503 CON H 1995 2018 - ## 15 ZURITE 000682 CON M 1963 1983 - ## 16 CURAHUASI 000677 CON M 1963 2018 - ## 17 URUBAMBA 113131 DAV M 2006 2008 - ## 18 URUBAMBA 000683 CON M 1963 2018 - ## 19 ANTA ANCACHURO 000684 CON M 1964 2018 - ## 20 HUACHIBAMBA 156303 CON M 1963 1978 + ## Station StationID Type Configuration `Data Start` `Data End` + ## + ## 1 MACHU … 000679 CON M 1964 2018 + ## 2 HUYRO 000678 CON M 1964 1981 + ## 3 CHILCA 472A9204 SUT M 2015 2018 + ## 4 ECHARA… 000716 CON M 1981 1982 + ## 5 MARANU… 000676 CON M 1970 1978 + ## 6 OLLANT… 47295014 SUT M 2011 2014 + ## 7 QUILLA… 4729B3E6 SUT M1 2000 2018 + ## 8 QUILLA… 000606 CON M 1964 2018 + ## 9 OCOBAM… 000681 CON M 1964 1983 + ## 10 MOLLEP… 000680 CON M 1963 1978 + ## 11 CUNYAC 156224 CON M 2002 2018 + ## 12 ECHARA… 156300 CON M 1963 1981 + ## 13 PUENTE… 472C44A2 SUT H 2016 2018 + ## 14 PUENTE… 230503 CON H 1995 2018 + ## 15 ZURITE 000682 CON M 1963 1983 + ## 16 CURAHU… 000677 CON M 1963 2018 + ## 17 URUBAM… 113131 DAV M 2006 2008 + ## 18 URUBAM… 000683 CON M 1963 2018 + ## 19 ANTA A… 000684 CON M 1964 2018 + ## 20 HUACHI… 156303 CON M 1963 1978 ## # ... with 9 more variables: `Period (Yr)` , `Station Status` , ## # Latitude , Longitude , Altitude , Region , ## # Province , District , Dist @@ -245,7 +246,7 @@ requ ## # ... with 10,947 more rows, and 7 more variables: `TBH07 (C)` , ## # `TBH13 (C)` , `TBH19 (C)` , `Prec07 (mm)` , `Prec19 ## # (mm)` , `Direccion del Viento` , `Velocidad del Viento - ## # (m/s)` + ## # (m/s)` Make sure to use the assignment operator (`<-`) to save the data into an R object, otherwise the data will just print out to the console, and @@ -260,8 +261,10 @@ Sometimes a long list of stations is hard to visualize spatially. The list of stations and shows them on a map powered by the [Leaflet](http://leafletjs.com/) library. Like the previous function, the map function is even smart enough to take a search as its list of -stations as per the example -below. +stations as per the example below. Note that this mapping functionality +requires the **leaflet** package to be installed, and it is not included +as a dependency of +**senamhiR**. #### Show a map of all stations that are between 30 and 50 km of Machu Picchu @@ -328,12 +331,12 @@ requ_qc %>% filter(Observations != "") %>% select(Fecha, `Tmax (C)`, `Tmin (C)`, ``` ## # A tibble: 5 x 5 - ## Fecha `Tmax (C)` `Tmin (C)` `Tmean (C)` Observations - ## - ## 1 2013-02-27 34 22.2 28.1 "Tmax dps: 3.4 -> 34 (1.4)… - ## 2 2013-05-08 31.4 20.8 26.1 Tmax dps: 314 -> 31.4 (0.2… - ## 3 2013-07-18 30.8 NA NA Tmin err: 221.2 -> NA - ## 4 2013-10-28 33.4 23.2 28.3 Tmin dps: 232 -> 23.2 (1.0… + ## Fecha `Tmax (C)` `Tmin (C)` `Tmean (C)` Observations + ## + ## 1 2013-02-27 34 22.2 28.1 "Tmax dps: 3.4 -> 34 (1.4) " + ## 2 2013-05-08 31.4 20.8 26.1 Tmax dps: 314 -> 31.4 (0.25) + ## 3 2013-07-18 30.8 NA NA Tmin err: 221.2 -> NA + ## 4 2013-10-28 33.4 23.2 28.3 Tmin dps: 232 -> 23.2 (1.03) ## 5 2013-12-24 30 23.6 26.8 "Tmax dps: 3 -> 30 (0.77) " For now, the data has been tested for decimal place-errors with the diff --git a/appveyor.yml b/appveyor.yml index c7baac2..134cb54 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,20 +1,22 @@ # DO NOT CHANGE the "init" and "install" sections below -environment: - R_CHECK_ARGS: --no-build-vignettes --no-manual - # Download script file from GitHub init: ps: | + Get-Date $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' install: - ps: Bootstrap + ps: | + Import-Module '.\scripts\appveyor-tool.ps1' + Bootstrap # Adapt as necessary starting from here +environment: + global: + WARNINGS_ARE_ERRORS: 1 + build_script: - travis-tool.sh install_deps @@ -22,7 +24,8 @@ test_script: - travis-tool.sh run_tests on_failure: - - travis-tool.sh dump_logs + - 7z a failure.zip *.Rcheck\* + - appveyor PushArtifact failure.zip artifacts: - path: '*.Rcheck\**\*.log' diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..63f2b76 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,10 @@ +version: '3' + +services: + + plumber: + build: ./plumber + restart: always + volumes: + - ./plumber/plumber_settings.R:/plumber_settings.R:ro + - /data/pcd:/data/pcd diff --git a/man/download_data.Rd b/man/download_data.Rd index 3a98f0b..7876871 100644 --- a/man/download_data.Rd +++ b/man/download_data.Rd @@ -2,9 +2,9 @@ % Please edit documentation in R/download_data.R \name{download_data} \alias{download_data} -\title{Access data from the Peruvian National Hydrological and Meterological Service via MySQL} +\title{Access data from the Peruvian National Hydrological and Meterological Service via API} \usage{ -download_data(station, year) +download_data(station, year = NULL) } \arguments{ \item{station}{character; the station id number to process.} @@ -15,7 +15,7 @@ download_data(station, year) tbl_df } \description{ -Download Peruvian historical climate data from the Senamhi via a MySQL archive. +Download archived Peruvian historical climate data from the Senamhi via an independent API. } \examples{ \dontrun{download_data('000401')} diff --git a/plumber/Dockerfile b/plumber/Dockerfile new file mode 100644 index 0000000..9c814c5 --- /dev/null +++ b/plumber/Dockerfile @@ -0,0 +1,16 @@ +FROM trestletech/plumber +MAINTAINER Conor Anderson + +COPY plumber.R /api/plumber.R + +RUN apt-get update &&\ + apt-get install -y --no-install-recommends curl libmariadb-dev &&\ + apt-get clean && rm -rf /tmp/* /var/lib/apt/lists/* + +RUN cd /api &&\ + sed -rn 's/library\((.*)\)/\1/p' plumber.R | sort | uniq > needed_packages &&\ + curl https://gitlab.com/ConorIA/conjuntool/snippets/1788463/raw?inline=false > install_pkgs.R &&\ + Rscript install_pkgs.R &&\ + rm -rf /tmp/* needed_packages install_pkgs.R + +CMD ["/api/plumber.R"] diff --git a/R/clean_table.R b/plumber/plumber.R similarity index 62% rename from R/clean_table.R rename to plumber/plumber.R index bc2a7f9..4fba6f3 100644 --- a/R/clean_table.R +++ b/plumber/plumber.R @@ -1,18 +1,12 @@ -#' Clean up table names and types -#' -#' @param datain the data.frame to process -#' @param config the station configuration -#' @param type the station type -#' @param clean_names Boolean; whether to clean up table names -#' @param remove_missing Boolean; whether to remove missing value codes, e.g. -888, -999 -#' @param fix_types Boolean; whether to fix column types -#' -#' @return tbl_df -#' -#' @importFrom tibble has_name -#' @keywords internal -#' -#' @author Conor I. Anderson +library(plumber) +library(DBI) +library(RMySQL) +library(tibble) +library(storr) + +source("plumber_settings.R") + +st_pcd <- storr_rds("/data/pcd/") .clean_table <- function(datain, config, type, clean_names = FALSE, remove_missing = FALSE, fix_types = FALSE) { @@ -103,3 +97,88 @@ } datain } + +#* Return the requested table +#* @serializer contentType list(type="application/octet-stream") +#* @get /catalogue +function() { + catalogue <- st_pcd$get("catalogue") + serialize(catalogue, NULL) +} + +get_mariadb <- function(station, year = NULL) { + + catalogue <- st_pcd$get("catalogue") + + station_data <- catalogue[catalogue$StationID == station, ] + type = station_data$Type + config = station_data$Configuration + + conn <- dbConnect(MySQL(), user = username, password = password, host = "pcd.conr.ca", dbname = "pcd") + on.exit(dbDisconnect(conn)) + + sql_table <- paste0("ID_", station) + if (sum(dbListTables(conn) %in% sql_table) != 1) { + dbDisconnect(conn) + stop("There was an error getting that table.") + } + + if (is.null(year) | length(year) == 0) { + dat <- as_tibble(dbReadTable(conn, sql_table, row.names = NULL)) + } else { + start <- min(year) + end <- max(year) + dat <- as_tibble(dbGetQuery(conn, paste0("SELECT * FROM ", sql_table, " WHERE Fecha BETWEEN \"", start, "-01-01\" AND \"", end, "-12-31\";"))) + } + dat <- .clean_table(dat, config, type, clean_names = TRUE, fix_types = TRUE) + dat +} + +del_mariadb <- function(station) { + + conn <- dbConnect(MySQL(), user = username, password = password, host = "pcd.conr.ca", dbname = "pcd") + on.exit(dbDisconnect(conn)) + + sql_table <- paste0("ID_", station) + if (sum(dbListTables(conn) %in% sql_table) == 1) { + dbRemoveTable(conn, sql_table) + } else { + warning("Table doesn't exist") + } + +} + +#* Return the requested table +#* @serializer contentType list(type="application/octet-stream") +#* @param station The station +#* @param year A vector of years to return +#* @post /get +function(station, year = NULL) { + + catalogue <- st_pcd$get("catalogue") + + if (nchar(station) < 6) { + station <- suppressWarnings(try(sprintf("%06d", as.numeric(station)), silent = TRUE)) + if (inherits(station, "try-error") | !station %in% catalogue$StationID) { + stop("Station ID appears invalid.") + } + } + + if (st_pcd$exists(station)) { + dat <- st_pcd$get(station) + } else { + message("Moving data from mariadb to plumber") + dat <- try(get_mariadb(station, NULL)) + if (inherits(dat, "type-error")) { + stop("We encountered an error!") + } + st_pcd$set(station, value = dat) + del_mariadb(station) + } + + if (!is.null(year) && length(year) > 0) { + dat <- dat[dat$Fecha >= paste0(min(year), "-01-01") & dat$Fecha <= paste0(max(year), "-12-31"),] + } + + serialize(dat, NULL) +} diff --git a/tests/testthat/test-download_data.R b/tests/testthat/test-download_data.R index caad7f1..9cb5ecb 100644 --- a/tests/testthat/test-download_data.R +++ b/tests/testthat/test-download_data.R @@ -39,5 +39,5 @@ test_that("download_data() can pad with zeroes", { ## should fail when no correct station is given test_that("download_data() fails when an incorrect station is requested", { - expect_error(download_data("foo"), "Station ID appears invalid.", fixed=TRUE) + expect_error(download_data("foo"), "Internal Server Error (HTTP 500).", fixed=TRUE) #FIXME })