diff --git a/DESCRIPTION b/DESCRIPTION index f688446..ccda2db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: senamhiR Type: Package Title: A Collection of Functions to Obtain Peruvian Climate Data -Version: 0.5.1 -Date: 2017-10-08 +Version: 0.5.2 +Date: 2018-04-27 Authors@R: c(person(given = c("Conor", "I."), family = "Anderson", role = c("aut","cre"), email = "conor.anderson@utoronto.ca"), person(given = c("William", "A."), family = "Gough", role = "ths", diff --git a/NAMESPACE b/NAMESPACE index 722164f..a52463b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(collapse_pcd) export(download_data_sql) export(map_stations) export(qc) @@ -13,6 +14,7 @@ importFrom(DBI,dbListTables) importFrom(DBI,dbReadTable) importFrom(RMySQL,MySQL) importFrom(dplyr,arrange) +importFrom(dplyr,bind_rows) importFrom(dplyr,filter) importFrom(dplyr,mutate) importFrom(dplyr,rowwise) diff --git a/R/clean_table.R b/R/clean_table.R index 4924b53..bc2a7f9 100644 --- a/R/clean_table.R +++ b/R/clean_table.R @@ -23,7 +23,7 @@ colnames <- c("Fecha", "Nivel06 (m)", "Nivel10 (m)", "Nivel14 (m)", "Nivel18 (m)", "Caudal (m^3/s)") if (type == "SUT") - colnames <- c("Fecha", "Tmean (C)", "Tmax (C)", "Tmin (C)", "Humidity (%)", + colnames <- c("Fecha", "Tmean (C)", "Tmax (C)", "Tmin (C)", "Humedad (%)", "Lluvia (mm)", "Presion (mb)", "Velocidad del Viento (m/s)", "Direccion del Viento", "Nivel Medio (m)") } else { @@ -62,7 +62,7 @@ datain$`Tmean (C)` <- as.numeric(datain$`Tmean (C)`) datain$`Tmax (C)` <- as.numeric(datain$`Tmax (C)`) datain$`Tmin (C)` <- as.numeric(datain$`Tmin (C)`) - datain$`Humidity (%)` <- as.numeric(datain$`Humidity (%)`) + datain$`Humedad (%)` <- as.numeric(datain$`Humedad (%)`) datain$`Lluvia (mm)` <- as.numeric(datain$`Lluvia (mm)`) datain$`Presion (mb)` <- as.numeric(datain$`Presion (mb)`) datain$`Direccion del Viento` <- as.character(datain$`Direccion del Viento`) diff --git a/R/collapse_pcd.R b/R/collapse_pcd.R new file mode 100644 index 0000000..ad3cc34 --- /dev/null +++ b/R/collapse_pcd.R @@ -0,0 +1,18 @@ +#' Collapse Senamhi stations with common variables +#' +#' @param datain a list of individual station tables acquired through the \code{senamhiR} function. +#' +#' @importFrom dplyr bind_rows +#' +#' @return a list of collapsed stations +#' @export +#' + +collapse_pcd <- function(datain) { + dataout <- mapply(add_column, .data = datain, StationID = lapply(datain, attr, "StationID"), MoreArgs = list(.before = 1), SIMPLIFY = FALSE, USE.NAMES = FALSE) + name_groups <- sapply(dataout, function(x) {paste(names(x), collapse = ", ")}) + name_groups <- sapply(name_groups, function(x, name_groups) {which(name_groups == x)}, unique(name_groups)) + dataout <- lapply(unique(name_groups), function(x, dataout, name_groups) {do.call("bind_rows", dataout[name_groups == x])}, dataout, name_groups) + if(length(dataout) == 1) return(dataout[[1]]) + dataout +} \ No newline at end of file diff --git a/R/download_data_sql.R b/R/download_data_sql.R index 1fc3d02..fa8ce44 100644 --- a/R/download_data_sql.R +++ b/R/download_data_sql.R @@ -39,7 +39,7 @@ download_data_sql <- function(station, year) { stop("There was an error getting that table.") } - if (missing(year)) { + if (missing(year) || is.null(year)) { dat <- as_tibble(dbReadTable(conn, sql_table, row.names = NULL)) } else { start <- min(year) diff --git a/R/senamhiR.R b/R/senamhiR.R index 1788ac3..43d983c 100644 --- a/R/senamhiR.R +++ b/R/senamhiR.R @@ -4,6 +4,7 @@ ##' ##' @param station character; the station id number to process. Can also be a vector of station ids, which will be returned as a list. ##' @param year numerical; a vector of years to process. Defaults to the full known range. +##' @param collapse Boolean; whether multiple stations should be collapsed into a single tibble stations of the same type. ##' ##' @author Conor I. Anderson ##' @@ -13,9 +14,9 @@ ##' ##' @examples ##' \dontrun{senamhiR('000401', 1998:2015)} -##' \dontrun{senamhiR(c('000401', '000152', '000219'))} +##' \dontrun{senamhiR(c('000401', '000152', '000219'), collapse = TRUE)} -senamhiR <- function(station, year) { +senamhiR <- function(station, year, collapse = FALSE) { if (missing(station)) { station <- readline(prompt = "Enter station number(s) separated by commas: ") station <- trimws(unlist(strsplit(station, split = ","))) @@ -31,15 +32,17 @@ senamhiR <- function(station, year) { stop("One or more requested stations invalid.") } - dataout <- list() - for (stn in station) { - if (missing(year)) year <- 1900:2100 - temp <- download_data_sql(stn, year) - dataout <- c(dataout, list(temp)) - } - if (length(station) == 1) { - return(dataout[[1]]) - } else { - return(dataout) + pull_data <- function(stn, year) { + rtn <- download_data_sql(stn, year) + attributes(rtn) <- append(attributes(rtn), catalogue[catalogue$StationID == stn,]) + rownames(rtn) <- NULL + rtn } + + if (missing(year)) year <- NULL + dataout <- lapply(station, pull_data, year) + + if (length(station) == 1) return(dataout[[1]]) + if (collapse) return(collapse_pcd(dataout)) + dataout } diff --git a/man/collapse_pcd.Rd b/man/collapse_pcd.Rd new file mode 100644 index 0000000..d891f04 --- /dev/null +++ b/man/collapse_pcd.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/collapse_pcd.R +\name{collapse_pcd} +\alias{collapse_pcd} +\title{Collapse Senamhi stations with common variables} +\usage{ +collapse_pcd(datain) +} +\arguments{ +\item{datain}{a list of individual station tables acquired through the \code{senamhiR} function.} +} +\value{ +a list of collapsed stations +} +\description{ +Collapse Senamhi stations with common variables +} diff --git a/man/senamhiR.Rd b/man/senamhiR.Rd index c0a97ea..374ec1d 100644 --- a/man/senamhiR.Rd +++ b/man/senamhiR.Rd @@ -4,19 +4,21 @@ \alias{senamhiR} \title{Download compiled data from the Peruvian National Hydrological and Meterological Service} \usage{ -senamhiR(station, year) +senamhiR(station, year, collapse = FALSE) } \arguments{ \item{station}{character; the station id number to process. Can also be a vector of station ids, which will be returned as a list.} \item{year}{numerical; a vector of years to process. Defaults to the full known range.} + +\item{collapse}{Boolean; whether multiple stations should be collapsed into a single tibble stations of the same type.} } \description{ Download compiled Peruvian historical climate data from the Senamhi web portal. } \examples{ \dontrun{senamhiR('000401', 1998:2015)} -\dontrun{senamhiR(c('000401', '000152', '000219'))} +\dontrun{senamhiR(c('000401', '000152', '000219'), collapse = TRUE)} } \author{ Conor I. Anderson diff --git a/tests/testthat/test-senamhiR.R b/tests/testthat/test-senamhiR.R index 362ac55..b5b1741 100644 --- a/tests/testthat/test-senamhiR.R +++ b/tests/testthat/test-senamhiR.R @@ -28,6 +28,14 @@ test_that("senamhiR can pad with zeroes", { expect_output(str(out), "13 variables") }) +## test senamhiR can collapse multiple stations +test_that("senamhiR can collapse stations with similar names", { + out <- senamhiR(c(401, 280, "472D23BE"), year = 2001, collapse = TRUE) + expect_that(out, is_a("list")) + expect_equal(lengths(out), c(14,11)) + expect_output(str(out), "List of 2") +}) + ## should fail when no correct station is given test_that("senamhiR() fails when an incorrect station is requested", { expect_error(senamhiR("foo"), "One or more requested stations invalid.", fixed=TRUE)