diff --git a/.Rbuildignore b/.Rbuildignore index b8e4acc..91d95f4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,10 +2,9 @@ ^\.Rproj\.user$ ^\.travis\.yml$ ^\.gitlab-ci\.yml$ -appveyor.yml -.gitattributes -.gitignore -README.md -README.Rmd -LICENSE.md -install_senamhiR.R +^appveyor\.yml$ +^\.gitattributes$ +^\.gitignore$ +^README\..*$ +^LICENSE\.md$ +^install_senamhiR\.R$ diff --git a/DESCRIPTION b/DESCRIPTION index e74997c..1319bd3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,26 +1,24 @@ Package: senamhiR Type: Package Title: A Collection of Functions to Obtain Peruvian Climate Data -Version: 0.4.2 +Version: 0.5.0 Date: 2017-10-08 Authors@R: c(person(given = c("Conor", "I."), family = "Anderson", - role = c("aut","cre"), email = "conor.anderson@mail.utoronto.ca"), + role = c("aut","cre"), email = "conor.anderson@utoronto.ca"), person(given = c("William", "A."), family = "Gough", role = "ths", email = "gough@utsc.utoronto.ca")) -Maintainer: Conor I. Anderson +Maintainer: Conor I. Anderson Description: A collection of functions to obtain archived Peruvian climatological or hydrological data form the Peruvian National Meterology and Hydrology Service. Depends: R (>= 3.1.0) Imports: DBI, - DT, dplyr, geosphere, leaflet, magrittr, RMySQL, - shiny, tibble, utils, zoo diff --git a/NAMESPACE b/NAMESPACE index e6158c0..af2d6ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,16 +5,12 @@ export(map_stations) export(qc) export(quick_audit) export(senamhiR) -export(station_explorer) export(station_search) importFrom(DBI,dbConnect) importFrom(DBI,dbDisconnect) importFrom(DBI,dbGetQuery) importFrom(DBI,dbListTables) importFrom(DBI,dbReadTable) -importFrom(DT,dataTableOutput) -importFrom(DT,datatable) -importFrom(DT,renderDataTable) importFrom(RMySQL,MySQL) importFrom(dplyr,filter) importFrom(dplyr,select) @@ -25,16 +21,6 @@ importFrom(leaflet,awesomeIcons) importFrom(leaflet,leaflet) importFrom(leaflet,setView) importFrom(magrittr,"%>%") -importFrom(shiny,br) -importFrom(shiny,column) -importFrom(shiny,em) -importFrom(shiny,fluidPage) -importFrom(shiny,fluidRow) -importFrom(shiny,runApp) -importFrom(shiny,selectInput) -importFrom(shiny,shinyApp) -importFrom(shiny,shinyUI) -importFrom(shiny,titlePanel) importFrom(stats,sd) importFrom(tibble,add_column) importFrom(tibble,as_tibble) diff --git a/R/download_data_sql.R b/R/download_data_sql.R index 70a5d99..1fc3d02 100644 --- a/R/download_data_sql.R +++ b/R/download_data_sql.R @@ -20,6 +20,13 @@ download_data_sql <- function(station, year) { + if (nchar(station) < 6) { + station <- suppressWarnings(try(sprintf("%06d", as.numeric(station)), silent = TRUE)) + if (inherits(station, "try-error") | !station %in% catalogue$StationID) { + stop("Station ID appears invalid.") + } + } + station_data <- catalogue[catalogue$StationID == station, ] type = station_data$Type config = station_data$Configuration @@ -27,7 +34,10 @@ download_data_sql <- function(station, year) { conn <- dbConnect(MySQL(), user = "anonymous", host = "pcd.conr.ca", dbname = "pcd") sql_table <- paste0("ID_", station) - if (sum(dbListTables(conn) %in% sql_table) != 1) stop("There was an error getting that table.") + if (sum(dbListTables(conn) %in% sql_table) != 1) { + dbDisconnect(conn) + stop("There was an error getting that table.") + } if (missing(year)) { dat <- as_tibble(dbReadTable(conn, sql_table, row.names = NULL)) diff --git a/R/map_stations.R b/R/map_stations.R index f418d3b..df0ca20 100644 --- a/R/map_stations.R +++ b/R/map_stations.R @@ -25,6 +25,16 @@ map_stations <- function(station, zoom) { if (inherits(station, "data.frame")) { station <- station$StationID } + + if (any(nchar(station) < 6)) { + station[nchar(station) < 6] <- suppressWarnings( + try(sprintf("%06d", as.numeric(station[nchar(station) < 6])), + silent = TRUE)) + } + + if (inherits(station, "try-error") || !station %in% catalogue$StationID) { + stop("One or more requested stations invalid.") + } poi <- NULL diff --git a/R/qc.R b/R/qc.R index d7b1c57..0208e4a 100644 --- a/R/qc.R +++ b/R/qc.R @@ -3,9 +3,9 @@ ##' @description A helper function to perform minimal quality control on the data. ##' For now, this script only performs action on the three main temperature variables. ##' -##' @param dat an R object of type data.frame passed form the export_data script +##' @param dat a \code{tbl_df} generated form the \code{senamhiR} package ##' -##' @return an R object of type data.frame. +##' @return a \code{tbl_df} ##' ##' @importFrom dplyr select filter ##' @importFrom tibble add_column diff --git a/R/quick_audit.R b/R/quick_audit.R index 7257291..60d8c70 100644 --- a/R/quick_audit.R +++ b/R/quick_audit.R @@ -27,7 +27,7 @@ quick_audit <- function(station, variables, by = "year", report = "pct", reverse dat <- station } else { if (inherits(station, "character")) { - dat <- read_data(station) + dat <- download_data_sql(station) } else { stop("I can't figure out what data you've given me.") } diff --git a/R/senamhiR.R b/R/senamhiR.R index 23f496e..1788ac3 100644 --- a/R/senamhiR.R +++ b/R/senamhiR.R @@ -21,7 +21,13 @@ senamhiR <- function(station, year) { station <- trimws(unlist(strsplit(station, split = ","))) } - if (!station %in% catalogue$StationID) { + if (any(nchar(station) < 6)) { + station[nchar(station) < 6] <- suppressWarnings( + try(sprintf("%06d", as.numeric(station[nchar(station) < 6])), + silent = TRUE)) + } + + if (inherits(station, "try-error") || !station %in% catalogue$StationID) { stop("One or more requested stations invalid.") } diff --git a/R/station_explorer.R b/R/station_explorer.R deleted file mode 100644 index 80ef52b..0000000 --- a/R/station_explorer.R +++ /dev/null @@ -1,93 +0,0 @@ -##' @title A Shiny interface to Senamhi weather and river stations -##' -##' @description A function to launch a shiny web app to explore Senamhi stations. -##' -##' @param local logical; if set to `TRUE`, we will show only the data that is available locally. -##' -##' @return none -##' -##' @author Conor I. Anderson -##' -##' @importFrom shiny br column em fluidPage fluidRow runApp selectInput shinyApp shinyUI titlePanel -##' @importFrom DT datatable dataTableOutput renderDataTable -##' -##' @export -##' -##' @examples -##' \dontrun{station_explorer()} - -station_explorer <- function(local = FALSE) { - - if (local) { - if (exists("localcatalogue")) { - data <- localcatalogue - } else { - if (file.exists("localCatalogue.rda")) { - load("localCatalogue.rda") - } else { - stop("You asked to show locally-downloaded data, but I couldn't find a local catalogue file in your environment. Please run `generate_local_catalogue()` first.") - } - } - comment(data) <- "This table lists the data that is present in your working directory, provided you have run `generate_local_catalogue()` recently." - } else { - data <- catalogue - } - - app <- shinyApp( - shinyUI( - fluidPage( - titlePanel("Senamhi station catalogue"), - - # Create a new Row in the UI for selectInputs - fluidRow( - column(2, - selectInput("config", - "Configuration:", - c("All", - unique(as.character(data$Configuration)))) - ), - column(2, - selectInput("type", - "Type:", - c("All", - unique(as.character(data$Type)))) - ), - column(2, - selectInput("sta", - "Station Status:", - c("All", - unique(as.character(data$`Station Status`)))) - ), - column(2, - selectInput("reg", - "Region:", - c("All", - unique(as.character(data$Region)))) - ) - ), - # Create a new row for the table. - fluidRow( - dataTableOutput("table")), - fluidRow(br(em(comment(data)))) - ) - ), - server = function(input, output) { - # Filter data based on selections - output$table <- renderDataTable(datatable({ - if (input$config != "All") { - data <- data[data$Configuration == input$config,] - } - if (input$type != "All") { - data <- data[data$Type == input$type,] - } - if (input$sta != "All") { - data <- data[data$`Station Status` == input$sta,] - } - if (input$reg != "All") { - data <- data[data$Region == input$reg,] - } - data - })) - }) - runApp(app) -} diff --git a/R/station_search.R b/R/station_search.R index 5b0d20d..7a7e436 100644 --- a/R/station_search.R +++ b/R/station_search.R @@ -39,6 +39,13 @@ station_search <- function(name = NULL, ignore.case = TRUE, glob = FALSE, region = NULL, baseline = NULL, config = NULL, target = NULL, dist = 0:100, sort = TRUE, ...) { + if (!is.null(target) && length(target) == 1L && nchar(target) < 6) { + target <- suppressWarnings(try(sprintf("%06d", as.numeric(target)), silent = TRUE)) + if (inherits(target, "try-error") || !target %in% catalogue$StationID) { + stop("Target station appears invalid.") + } + } + # If `name` is not NULL, filter by name if (!is.null(name)) { if (glob) name <- glob2rx(name) diff --git a/R/zzz.R b/R/zzz.R index 3c10d0c..7a1341d 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,3 @@ .onAttach <- function(libname = find.package("senamhiR"), pkgname = "senamhiR") { packageStartupMessage("The information accessed by this package was compiled and maintained by Peru's National Meteorology and Hydrology Service (Senamhi). The use of this data is of your sole responsibility.") -} \ No newline at end of file +} diff --git a/README.Rmd b/README.Rmd index fab8e6d..2c19d01 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,11 +8,12 @@ output: ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(senamhiR) +library(dplyr) ``` [![build status](https://gitlab.com/ConorIA/senamhiR/badges/master/build.svg)](https://gitlab.com/ConorIA/senamhiR/commits/master) [![Build status](https://ci.appveyor.com/api/projects/status/60kbu1b7wkf7akqn?svg=true)](https://ci.appveyor.com/project/ConorIA/senamhir-bxb45) [![codecov](https://codecov.io/gl/ConorIA/senamhiR/branch/master/graph/badge.svg)](https://codecov.io/gl/ConorIA/senamhiR) -The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of March 2017. This data was originally converted from HTML, and is stored in a MySQL database in tibble format. +The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of April 10, 2018. This data was originally converted from HTML, and is stored in a MySQL database in tibble format. It is important to note that the info on the Senamhi website has not undergone quality control, however, this package includes a helper function to perform the most common quality control operations for the temperature variables. More functions will be added in the future. @@ -77,7 +78,7 @@ If I wanted to download data for Requena (station no. 000280) from 1981 to 2010, ```{r} requ <- senamhiR("000280", 1981:2010) ``` -_Note: Since the StationID numbers contain leading zeros, they must be entered as a character (in quotation marks)._ +_Note: Since the StationID numbers contain leading zeros, any station that is less than six characters long will be padded with zeroes. i.e. 280 becomes 000280._ ```{r} requ @@ -85,15 +86,7 @@ requ Make sure to use the assignment operator (`<-`) to save the data into an R object, otherwise the data will just print out to the console, and won't get saved anywhere in the memory. -## Additional functions - -`senamhiR` includes some additional functions to help visualize stations more easily. - -### `station_explorer()` - -Often, irrespective of the number of filters one uses, it is simply easier to just mouse through a table and find the data that one needs. To make this "mousing" just a little easier, I have included a Shiny data table to help with navigating the list of stations. Call the table up by running `station_explorer()` with no arguments. - -This table is also fully compatible with the advanced search function. To use a filtered list of stations with the Shiny table, just pass a search result as an argument to the function. This result can be a call to `station_search()`, or an object containing a saved search result. +## For easier station visualization ### `map_stations()` @@ -113,7 +106,7 @@ There are two functions included to perform some basic quality control. The `quick_audit()` function will return a tibble listing the percentage or number of missing values for a station. For instance, the following command will return the percentage of missing values in our 30-year Requena dataset: ```{r} -quick_audit(requ, c("Tmean", "Tmax", "Tmin")) +quick_audit(requ, c("Tmax", "Tmin")) ``` Use `report = "n"` to show the _number_ of missing values. Use `by = "month"` to show missing data by month instead of year. For instance, the number of days for which Mean Temperature was missing at Tocache in 1980: @@ -128,8 +121,10 @@ quick_audit(toca, "Tmean", by = "month", report = "n") There is an incomplete and experimental function to perform automated quality control on climate data acquired thought this package. For instance: ```{r} -toca <- senamhiR("000463", year = 1980) -quick_audit(toca, "Tmean", by = "month", report = "n") +requ_dirty <- senamhiR("000280") #1960 to 2018 +requ_qc <- qc(requ_dirty) +requ_qc %>% filter(Observations != "") %>% select(Fecha, `Tmax (C)`, `Tmin (C)`, `Tmean (C)`, Observations) + ``` For now, the data has been tested for decimal place-errors with the following logic: @@ -142,16 +137,16 @@ If the number appears to have missed a decimal place (e.g. 324 -> 32.4; 251 -> 2 If the number seems to be the result of some other typographical error (e.g. 221.2), we discard the data point. -##### Case 2: _T~max~_ < _T~min~_ +##### Case 2: _T_max < _T_min -In case 2, we perform the same tests for both _T~max~_ and _T~min~_. If the number is within 1.5 standard deviations of all values 30 days before and after the day in question, we leave the number alone. (Note: this is often the case for _T~min~_ but seldom the case for _T~max~_). If the number does not fall within 1.5 standard deviations, we perform an additional level of testing to check if the number is the result of a premature decimal point (e.g. 3.4 -> 34.0; 3 -> 30.0). In this case, we try to multiply the number by 10. If this new result is within 1.5 standard deviations of all values 30 days before and after the day in question, we keep the result, otherwise, we discard it. +In case 2, we perform the same tests for both _T_max and _T_min. If the number is within 1.5 standard deviations of all values 30 days before and after the day in question, we leave the number alone. (Note: this is often the case for _T_min but seldom the case for _T_max). If the number does not fall within 1.5 standard deviations, we perform an additional level of testing to check if the number is the result of a premature decimal point (e.g. 3.4 -> 34.0; 3 -> 30.0). In this case, we try to multiply the number by 10. If this new result is within 1.5 standard deviations of all values 30 days before and after the day in question, we keep the result, otherwise, we discard it. _I have less confidence in this solution than I do for Case 1._ #### Cases that are currently missed: - - Cases where _T~min~_ is small because of a typo. - - Cases where _T~max~_ is small because of a typo, but not smaller than _T~min~_. + - Cases where _T_min is small because of a typo. + - Cases where _T_max is small because of a typo, but not smaller than _T_min. #### Cases where this function is plain wrong: @@ -159,11 +154,11 @@ _I have less confidence in this solution than I do for Case 1._ #### Variables controlled for: - - _T~max~_ - - _T~min~_ - - _T~mean~_ + - _T_max + - _T_min + - _T_mean -__No other variables are currently tested; hydrological data is not tested. This data should not be considered "high quality", use of the data is your responsibility.__ Note that all values that are modified form their original values will be recorded in a new "Observations" column in the resultant tibble. +__No other variables are currently tested; hydrological data is not tested. This data should not be considered "high quality", use of the data is your responsibility.__ Note that all values that are modified from their original values will be recorded in a new "Observations" column in the resultant tibble. ## Disclaimer @@ -177,7 +172,9 @@ If that seems like a lot of work, just think about how much work it would have b ## Senamhi terms of use -Senamhi's terms of use were originally posted [here](http://www.senamhi.gob.pe/?p=0613), but that link is currently redirecting to the Senamhi home page. However, the text of the terms was identical to the [terms](http://www.peruclima.pe/?p=condiciones) of Senamhi's PeruClima website ([Google translation](https://translate.google.com/translate?hl=en&sl=es&tl=en&u=http%3A%2F%2Fwww.peruclima.pe%2F%3Fp%3Dcondiciones)). The terms allow for the free and public access to information on their website. Likewise, the data may be used in for-profit and non-profit applications. However, Senamhi stipulates that any use of the data must be accompanied by a disclaimer that Senamhi is the proprietor of the information. The following text is recommended (official text in Spanish): +Senamhi's terms of use are _technically_ supposed to be [here](http://senamhi.gob.pe/?p=terminos_condiciones), but that link is currently redirecting to the Senamhi home page. I authored this package when the terms were still available online. At the time of development, the terms allowed for the free and public access to information on their website, in both for-profit and non-profit applications. However, Senamhi stipulated that any use of the data must be accompanied by a disclaimer that Senamhi is the proprietor of the information. The following text was recommended (official text in Spanish): - **Official Spanish:** _Información recopilada y trabajada por el Servicio Nacional de Meteorología e Hidrología del Perú. El uso que se le da a esta información es de mi (nuestra) entera responsabilidad._ - **English translation:** This information was compiled and maintained by Peru's National Meteorology and Hydrology Service (Senamhi). The use of this data is of my (our) sole responsibility. + +A message similar to the English message above is printed to the R console whenever the package is loaded. \ No newline at end of file diff --git a/README.md b/README.md index cf07fb7..51b4c9f 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,15 @@ -# senamhiR: A collection of functions to obtain Peruvian climate data in R +--- +title: "senamhiR: A collection of functions to obtain Peruvian climate data in R" +output: + html_document: + keep_md: yes +--- [![build status](https://gitlab.com/ConorIA/senamhiR/badges/master/build.svg)](https://gitlab.com/ConorIA/senamhiR/commits/master) [![Build status](https://ci.appveyor.com/api/projects/status/60kbu1b7wkf7akqn?svg=true)](https://ci.appveyor.com/project/ConorIA/senamhir-bxb45) [![codecov](https://codecov.io/gl/ConorIA/senamhiR/branch/master/graph/badge.svg)](https://codecov.io/gl/ConorIA/senamhiR) -The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of March 2017. This data was originally converted from HTML, and is stored in a MySQL database in tibble format. +The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of April 10, 2018. This data was originally converted from HTML, and is stored in a MySQL database in tibble format. It is important to note that the info on the Senamhi website has not undergone quality control, however, this package includes a helper function to perform the most common quality control operations for the temperature variables. More functions will be added in the future. @@ -37,22 +42,22 @@ station_search("Santa") ``` ``` -## # A tibble: 42 x 12 -## Station StationID Type Configuration `Data Start` -## -## 1 SANTA MARIA DE NIEVA 000256 CON M 1951 -## 2 SANTA 000433 CON M 1964 -## 3 SANTA RITA 000829 CON M 1977 -## 4 SANTA ELENA 000834 CON M 1963 -## 5 SANTA ISABEL DE SIGUAS 158201 CON M 1964 -## 6 SANTA CRUZ DE HOSPICIO 113248 SUT M 2015 -## 7 SANTA CRUZ 000351 CON M 1963 -## 8 SANTA CATALINA DE PULAN 153200 CON M 1963 -## 9 HACIENDA SANTA INES 000766 CON M 1954 -## 10 SANTAROSA LLIHUA 151505 CON M 1980 -## # ... with 32 more rows, and 7 more variables: `Data End` , `Station -## # Status` , Latitude , Longitude , Region , -## # Province , District +## # A tibble: 42 x 14 +## Station StationID Type Configuration `Data Start` `Data End` +## +## 1 SANTA MARIA DE N… 000256 CON M 1951 2017 +## 2 SANTA 000433 CON M 1964 1986 +## 3 SANTA RITA 000829 CON M 1977 1992 +## 4 SANTA ELENA 000834 CON M 1963 1973 +## 5 SANTA ISABEL DE … 158201 CON M 1964 1982 +## 6 SANTA CRUZ DE HO… 113248 SUT M 2015 2016 +## 7 SANTA CRUZ 000351 CON M 1963 2018 +## 8 SANTA CATALINA D… 153200 CON M 1963 1983 +## 9 HACIENDA SANTA I… 000766 CON M 1954 1955 +## 10 SANTAROSA LLIHUA 151505 CON M 1980 1998 +## # ... with 32 more rows, and 8 more variables: `Period (Yr)` , +## # `Station Status` , Latitude , Longitude , Region , +## # Province , District , Downloaded ``` Note that the `tibble` object (a special sort of `data.frame`) won't print more than the first 10 rows by default. To see all of the results, you can wrap the command in `View()` so that it becomes `View(find_station("Santa"))`. @@ -65,22 +70,22 @@ station_search("San*", glob = TRUE) ``` ``` -## # A tibble: 135 x 12 -## Station StationID Type Configuration `Data Start` -## -## 1 SANTA MARIA DE NIEVA 000256 CON M 1951 -## 2 SAN RAFAEL 152222 CON M 1965 -## 3 SAN LORENZO # 5 000430 CON M 1966 -## 4 SAN JACINTO DE NEPENA 000424 CON M 1956 -## 5 SAN JACINTO 201901 CON H 1947 -## 6 SAN DIEGO 000420 CON M 1960 -## 7 SANTIAGO ANTUNEZ DE MAYOLO 000426 CON M 1998 -## 8 SANTA 000433 CON M 1964 -## 9 SAN PEDRO 211404 CON H 2009 -## 10 SANTA RITA 000829 CON M 1977 -## # ... with 125 more rows, and 7 more variables: `Data End` , `Station -## # Status` , Latitude , Longitude , Region , -## # Province , District +## # A tibble: 135 x 14 +## Station StationID Type Configuration `Data Start` `Data End` +## +## 1 SANTA MARIA DE N… 000256 CON M 1951 2017 +## 2 SAN RAFAEL 152222 CON M 1965 1966 +## 3 SAN LORENZO # 5 000430 CON M 1966 1972 +## 4 SAN JACINTO DE N… 000424 CON M 1956 1968 +## 5 SAN JACINTO 201901 CON H 1947 1990 +## 6 SAN DIEGO 000420 CON M 1960 1960 +## 7 SANTIAGO ANTUNEZ… 000426 CON M 1998 2018 +## 8 SANTA 000433 CON M 1964 1986 +## 9 SAN PEDRO 211404 CON H 2009 2018 +## 10 SANTA RITA 000829 CON M 1977 1992 +## # ... with 125 more rows, and 8 more variables: `Period (Yr)` , +## # `Station Status` , Latitude , Longitude , Region , +## # Province , District , Downloaded ``` You can filter your search results by region, by station type, by a given baseline period, and by proximity to another station or a vector of coordinates. You can use any combination of these four filters in your search. The function is fully documented, so take a look at `?station_search`. Let's see some examples. @@ -92,22 +97,22 @@ station_search(region = "SAN MARTIN") ``` ``` -## # A tibble: 72 x 12 -## Station StationID Type Configuration `Data Start` `Data End` -## -## 1 MOYOBAMBA 000378 CON M 1946 2016 -## 2 NARANJILLO 000219 CON M 1975 2016 -## 3 NAVARRO 000386 CON M 1964 2016 -## 4 NARANJILLO 4724851A SUT M1 2000 2016 -## 5 EL PORVENIR 4723013A SUT M1 2001 2016 -## 6 NUEVO LIMA 153312 CON M 1963 2016 -## 7 SHEPTE 153301 CON M 1963 1985 -## 8 TINGO DE PONAZA 153318 CON M 1963 2005 -## 9 TINGO DE PONAZA 000297 CON M 1998 2016 -## 10 PUEBLO LIBRE 152228 CON M 1996 1998 -## # ... with 62 more rows, and 6 more variables: `Station Status` , -## # Latitude , Longitude , Region , Province , -## # District +## # A tibble: 72 x 14 +## Station StationID Type Configuration `Data Start` `Data End` +## +## 1 MOYOBAMBA 000378 CON M 1946 2017 +## 2 NARANJILLO 000219 CON M 1975 2017 +## 3 NAVARRO 000386 CON M 1964 2017 +## 4 NARANJILLO 4724851A SUT M1 2000 2018 +## 5 EL PORVENIR 4723013A SUT M1 2001 2018 +## 6 NUEVO LIMA 153312 CON M 1963 2017 +## 7 SHEPTE 153301 CON M 1963 1985 +## 8 TINGO DE PONAZA 153318 CON M 1963 2005 +## 9 TINGO DE PONAZA 000297 CON M 1998 2017 +## 10 PUEBLO LIBRE 152228 CON M 1996 1998 +## # ... with 62 more rows, and 8 more variables: `Period (Yr)` , +## # `Station Status` , Latitude , Longitude , Region , +## # Province , District , Downloaded ``` #### Find stations named "Santa", with data available between 1971 to 2000 @@ -116,22 +121,22 @@ station_search("Santa", baseline = 1971:2000) ``` ``` -## # A tibble: 10 x 12 -## Station StationID Type Configuration `Data Start` -## -## 1 SANTA MARIA DE NIEVA 000256 CON M 1951 -## 2 SANTA CRUZ 000351 CON M 1963 -## 3 SANTA EULALIA 155213 CON M 1963 -## 4 SANTA CRUZ 155202 CON M 1963 -## 5 SANTA ROSA 000536 CON M 1967 -## 6 SANTA MARIA DE NANAY 152409 CON M 1963 -## 7 SANTA RITA DE CASTILLA 152401 CON M 1963 -## 8 SANTA CLOTILDE 000177 CON M 1963 -## 9 SANTA CRUZ 152303 CON M 1963 -## 10 SANTA ROSA 000823 CON M 1970 -## # ... with 7 more variables: `Data End` , `Station Status` , +## # A tibble: 10 x 14 +## Station StationID Type Configuration `Data Start` `Data End` +## +## 1 SANTA MARIA DE N… 000256 CON M 1951 2017 +## 2 SANTA CRUZ 000351 CON M 1963 2018 +## 3 SANTA EULALIA 155213 CON M 1963 2018 +## 4 SANTA CRUZ 155202 CON M 1963 2018 +## 5 SANTA ROSA 000536 CON M 1967 2006 +## 6 SANTA MARIA DE N… 152409 CON M 1963 2018 +## 7 SANTA RITA DE CA… 152401 CON M 1963 2018 +## 8 SANTA CLOTILDE 000177 CON M 1963 2014 +## 9 SANTA CRUZ 152303 CON M 1963 2008 +## 10 SANTA ROSA 000823 CON M 1966 2017 +## # ... with 8 more variables: `Period (Yr)` , `Station Status` , ## # Latitude , Longitude , Region , Province , -## # District +## # District , Downloaded ``` #### Find all stations between 0 and 100 km from Station No. 000401 @@ -140,22 +145,22 @@ station_search(target = "000401", dist = 0:100) ``` ``` -## # A tibble: 57 x 13 -## Station StationID Type Configuration `Data Start` `Data End` -## -## 1 TARAPOTO 000401 CON M 1998 2016 -## 2 CUNUMBUQUE 153311 CON M 1963 2016 -## 3 CUMBAZA 221801 CON H 1968 2015 -## 4 LAMAS 000383 CON M 1963 2016 -## 5 SAN ANTONIO 153314 CON M 1963 2016 -## 6 SHANAO 221802 CON H 1965 2015 -## 7 SHANAO 153328 CON M 2002 2016 -## 8 SHANAO 210006 SUT H 2016 2017 -## 9 TABALOSOS 000322 CON M 1963 2016 -## 10 EL PORVENIR 000310 CON M 1964 2016 -## # ... with 47 more rows, and 7 more variables: `Station Status` , -## # Latitude , Longitude , Region , Province , -## # District , Dist +## # A tibble: 57 x 15 +## Station StationID Type Configuration `Data Start` `Data End` +## +## 1 TARAPOTO 000401 CON M 1998 2017 +## 2 CUNUMBUQUE 153311 CON M 1963 2017 +## 3 CUMBAZA 221801 CON H 1968 2017 +## 4 LAMAS 000383 CON M 1963 2017 +## 5 SAN ANTONIO 153314 CON M 1963 2017 +## 6 SHANAO 221802 CON H 1965 2017 +## 7 SHANAO 153328 CON M 2002 2017 +## 8 SHANAO 210006 SUT H 2016 2018 +## 9 TABALOSOS 000322 CON M 1963 2017 +## 10 EL PORVENIR 000310 CON M 1964 2017 +## # ... with 47 more rows, and 9 more variables: `Period (Yr)` , +## # `Station Status` , Latitude , Longitude , Region , +## # Province , District , Downloaded , Dist ``` #### Find all stations that are within 50 km of Machu Picchu @@ -164,31 +169,31 @@ station_search(target = c(-13.163333, -72.545556), dist = 0:50) ``` ``` -## # A tibble: 19 x 13 -## Station StationID Type Configuration `Data Start` `Data End` -## -## 1 MACHU PICCHU 000679 CON M 1964 2016 -## 2 HUYRO 000678 CON M 1964 1981 -## 3 CHILCA 472A9204 SUT H 2015 2016 -## 4 ECHARATE 000716 CON M 1981 1982 -## 5 MARANURA 000676 CON M 1970 1978 -## 6 OLLANTAYTAMBO 47295014 SUT M 2011 2013 -## 7 QUILLABAMBA 4729B3E6 SUT M1 2001 2016 -## 8 QUILLABAMBA 000606 CON M 1964 2016 -## 9 OCOBAMBA 000681 CON M 1964 1983 -## 10 MOLLEPATA 000680 CON M 1963 1978 -## 11 CUNYAC 156224 CON M 2002 2016 -## 12 ECHARATE 156300 CON M 1963 1981 -## 13 PUENTE CUNYAC 230503 CON H 1995 2016 -## 14 ZURITE 000682 CON M 1964 1983 -## 15 CURAHUASI 000677 CON M 1964 2016 -## 16 URUBAMBA 113131 DAV M 2006 2008 -## 17 URUBAMBA 000683 CON M 1963 2016 -## 18 ANTA ANCACHURO 000684 CON M 1964 2016 -## 19 HUACHIBAMBA 156303 CON M 1963 1978 -## # ... with 7 more variables: `Station Status` , Latitude , -## # Longitude , Region , Province , District , -## # Dist +## # A tibble: 19 x 15 +## Station StationID Type Configuration `Data Start` `Data End` +## +## 1 MACHU PICCHU 000679 CON M 1964 2017 +## 2 HUYRO 000678 CON M 1964 1981 +## 3 CHILCA 472A9204 SUT H 2015 2018 +## 4 ECHARATE 000716 CON M 1981 1982 +## 5 MARANURA 000676 CON M 1970 1978 +## 6 OLLANTAYTAMBO 47295014 SUT M 2011 2013 +## 7 QUILLABAMBA 4729B3E6 SUT M1 2001 2018 +## 8 QUILLABAMBA 000606 CON M 1964 2018 +## 9 OCOBAMBA 000681 CON M 1964 1983 +## 10 MOLLEPATA 000680 CON M 1963 1978 +## 11 CUNYAC 156224 CON M 2002 2018 +## 12 ECHARATE 156300 CON M 1963 1981 +## 13 PUENTE CUNYAC 230503 CON H 1995 2018 +## 14 ZURITE 000682 CON M 1964 1983 +## 15 CURAHUASI 000677 CON M 1963 2017 +## 16 URUBAMBA 113131 DAV M 2006 2008 +## 17 URUBAMBA 000683 CON M 1963 2017 +## 18 ANTA ANCACHURO 000684 CON M 1964 2017 +## 19 HUACHIBAMBA 156303 CON M 1963 1978 +## # ... with 9 more variables: `Period (Yr)` , `Station Status` , +## # Latitude , Longitude , Region , Province , +## # District , Downloaded , Dist ``` ### Acquire data: `senamhiR()` @@ -201,7 +206,7 @@ If I wanted to download data for Requena (station no. 000280) from 1981 to 2010, ```r requ <- senamhiR("000280", 1981:2010) ``` -_Note: Since the StationID numbers contain leading zeros, they must be entered as a character (in quotation marks)._ +_Note: Since the StationID numbers contain leading zeros, any station that is less than six characters long will be padded with zeroes. i.e. 280 becomes 000280._ ```r @@ -209,36 +214,28 @@ requ ``` ``` -## # A tibble: 10,957 x 14 -## Fecha `Tmean (C)` `Tmax (C)` `Tmin (C)` `TBS07 (C)` `TBS13 (C)` -## -## 1 1981-01-01 29.0 35.0 23.0 24.8 30.2 -## 2 1981-01-02 28.1 34.0 22.2 24.2 30.0 -## 3 1981-01-03 26.1 29.0 23.2 24.6 25.2 -## 4 1981-01-04 26.1 30.2 22.0 24.6 28.0 -## 5 1981-01-05 27.7 33.0 22.4 24.0 25.0 -## 6 1981-01-06 29.1 35.2 23.0 25.0 30.8 -## 7 1981-01-07 28.3 33.6 23.0 25.4 30.8 -## 8 1981-01-08 30.1 37.4 22.8 25.4 35.0 -## 9 1981-01-09 29.0 35.0 23.0 27.0 35.0 -## 10 1981-01-10 29.0 35.6 22.4 24.8 34.4 -## # ... with 10,947 more rows, and 8 more variables: `TBS19 (C)` , -## # `TBH07 (C)` , `TBH13 (C)` , `TBH19 (C)` , `Prec07 -## # (mm)` , `Prec19 (mm)` , `Direccion del Viento` , -## # `Velocidad del Viento (m/s)` +## # A tibble: 10,957 x 13 +## Fecha `Tmax (C)` `Tmin (C)` `TBS07 (C)` `TBS13 (C)` `TBS19 (C)` +## +## 1 1981-01-01 35.0 23.0 24.8 30.2 24.4 +## 2 1981-01-02 34.0 22.2 24.2 30.0 25.6 +## 3 1981-01-03 29.0 23.2 24.6 25.2 24.0 +## 4 1981-01-04 30.2 22.0 24.6 28.0 25.0 +## 5 1981-01-05 33.0 22.4 24.0 25.0 23.4 +## 6 1981-01-06 35.2 23.0 25.0 30.8 27.8 +## 7 1981-01-07 33.6 23.0 25.4 30.8 26.0 +## 8 1981-01-08 37.4 22.8 25.4 35.0 24.0 +## 9 1981-01-09 35.0 23.0 27.0 35.0 26.0 +## 10 1981-01-10 35.6 22.4 24.8 34.4 28.0 +## # ... with 10,947 more rows, and 7 more variables: `TBH07 (C)` , +## # `TBH13 (C)` , `TBH19 (C)` , `Prec07 (mm)` , `Prec19 +## # (mm)` , `Direccion del Viento` , `Velocidad del Viento +## # (m/s)` ``` Make sure to use the assignment operator (`<-`) to save the data into an R object, otherwise the data will just print out to the console, and won't get saved anywhere in the memory. -## Additional functions - -`senamhiR` includes some additional functions to help visualize stations more easily. - -### `station_explorer()` - -Often, irrespective of the number of filters one uses, it is simply easier to just mouse through a table and find the data that one needs. To make this "mousing" just a little easier, I have included a Shiny data table to help with navigating the list of stations. Call the table up by running `station_explorer()` with no arguments. - -This table is also fully compatible with the advanced search function. To use a filtered list of stations with the Shiny table, just pass a search result as an argument to the function. This result can be a call to `station_search()`, or an object containing a saved search result. +## For easier station visualization ### `map_stations()` @@ -260,23 +257,23 @@ The `quick_audit()` function will return a tibble listing the percentage or numb ```r -quick_audit(requ, c("Tmean", "Tmax", "Tmin")) -``` - -``` -## # A tibble: 30 x 4 -## Year `Tmean (C) pct NA` `Tmax (C) pct NA` `Tmin (C) pct NA` -## -## 1 1981 8.4931507 8.4931507 8.4931507 -## 2 1982 0.0000000 0.0000000 0.0000000 -## 3 1983 41.9178082 41.9178082 41.9178082 -## 4 1984 17.2131148 8.1967213 17.2131148 -## 5 1985 7.6712329 0.2739726 7.6712329 -## 6 1986 0.8219178 0.8219178 0.8219178 -## 7 1987 17.8082192 17.8082192 17.8082192 -## 8 1988 8.4699454 8.4699454 8.4699454 -## 9 1989 0.0000000 0.0000000 0.0000000 -## 10 1990 0.0000000 0.0000000 0.0000000 +quick_audit(requ, c("Tmax", "Tmin")) +``` + +``` +## # A tibble: 30 x 3 +## Year `Tmax (C) pct NA` `Tmin (C) pct NA` +## +## 1 1981 8.49 8.49 +## 2 1982 0. 0. +## 3 1983 41.9 41.9 +## 4 1984 8.20 17.2 +## 5 1985 0.274 7.67 +## 6 1986 0.822 0.822 +## 7 1987 17.8 17.8 +## 8 1988 8.47 8.47 +## 9 1989 0. 0. +## 10 1990 0. 0. ## # ... with 20 more rows ``` @@ -290,20 +287,20 @@ quick_audit(toca, "Tmean", by = "month", report = "n") ``` ## # A tibble: 12 x 5 -## Year Month `Year-month` `Tmean (C) consec NA` `Tmean (C) tot NA` +## Year Month `Year-month` `Tmean (C) consec NA` `Tmean (C) tot NA` ## -## 1 1980 01 Jan 1980 0 0 -## 2 1980 02 Feb 1980 0 0 -## 3 1980 03 Mar 1980 2 3 -## 4 1980 04 Apr 1980 4 4 -## 5 1980 05 May 1980 0 0 -## 6 1980 06 Jun 1980 0 0 -## 7 1980 07 Jul 1980 0 0 -## 8 1980 08 Aug 1980 0 0 -## 9 1980 09 Sep 1980 1 1 -## 10 1980 10 Oct 1980 0 0 -## 11 1980 11 Nov 1980 1 1 -## 12 1980 12 Dec 1980 0 0 +## 1 1980 01 Jan 1980 0 0 +## 2 1980 02 Feb 1980 0 0 +## 3 1980 03 Mar 1980 2 3 +## 4 1980 04 Apr 1980 4 4 +## 5 1980 05 May 1980 0 0 +## 6 1980 06 Jun 1980 0 0 +## 7 1980 07 Jul 1980 0 0 +## 8 1980 08 Aug 1980 0 0 +## 9 1980 09 Sep 1980 1 1 +## 10 1980 10 Oct 1980 0 0 +## 11 1980 11 Nov 1980 1 1 +## 12 1980 12 Dec 1980 0 0 ``` ### `qc()` @@ -312,26 +309,20 @@ There is an incomplete and experimental function to perform automated quality co ```r -toca <- senamhiR("000463", year = 1980) -quick_audit(toca, "Tmean", by = "month", report = "n") +requ_dirty <- senamhiR("000280") #1960 to 2018 +requ_qc <- qc(requ_dirty) +requ_qc %>% filter(Observations != "") %>% select(Fecha, `Tmax (C)`, `Tmin (C)`, `Tmean (C)`, Observations) ``` ``` -## # A tibble: 12 x 5 -## Year Month `Year-month` `Tmean (C) consec NA` `Tmean (C) tot NA` -## -## 1 1980 01 Jan 1980 0 0 -## 2 1980 02 Feb 1980 0 0 -## 3 1980 03 Mar 1980 2 3 -## 4 1980 04 Apr 1980 4 4 -## 5 1980 05 May 1980 0 0 -## 6 1980 06 Jun 1980 0 0 -## 7 1980 07 Jul 1980 0 0 -## 8 1980 08 Aug 1980 0 0 -## 9 1980 09 Sep 1980 1 1 -## 10 1980 10 Oct 1980 0 0 -## 11 1980 11 Nov 1980 1 1 -## 12 1980 12 Dec 1980 0 0 +## # A tibble: 5 x 5 +## Fecha `Tmax (C)` `Tmin (C)` `Tmean (C)` Observations +## +## 1 2013-02-27 34.0 22.2 28.1 "Tmax dps: 3.4 -> 34 (1.4)… +## 2 2013-05-08 31.4 20.8 26.1 Tmax dps: 314 -> 31.4 (0.2… +## 3 2013-07-18 30.8 NA NA Tmin err: 221.2 -> NA +## 4 2013-10-28 33.4 23.2 28.3 Tmin dps: 232 -> 23.2 (1.0… +## 5 2013-12-24 30.0 23.6 26.8 "Tmax dps: 3 -> 30 (0.77) " ``` For now, the data has been tested for decimal place-errors with the following logic: @@ -344,16 +335,16 @@ If the number appears to have missed a decimal place (e.g. 324 -> 32.4; 251 -> 2 If the number seems to be the result of some other typographical error (e.g. 221.2), we discard the data point. -##### Case 2: _T~max~_ < _T~min~_ +##### Case 2: _T_max < _T_min -In case 2, we perform the same tests for both _T~max~_ and _T~min~_. If the number is within 1.5 standard deviations of all values 30 days before and after the day in question, we leave the number alone. (Note: this is often the case for _T~min~_ but seldom the case for _T~max~_). If the number does not fall within 1.5 standard deviations, we perform an additional level of testing to check if the number is the result of a premature decimal point (e.g. 3.4 -> 34.0; 3 -> 30.0). In this case, we try to multiply the number by 10. If this new result is within 1.5 standard deviations of all values 30 days before and after the day in question, we keep the result, otherwise, we discard it. +In case 2, we perform the same tests for both _T_max and _T_min. If the number is within 1.5 standard deviations of all values 30 days before and after the day in question, we leave the number alone. (Note: this is often the case for _T_min but seldom the case for _T_max). If the number does not fall within 1.5 standard deviations, we perform an additional level of testing to check if the number is the result of a premature decimal point (e.g. 3.4 -> 34.0; 3 -> 30.0). In this case, we try to multiply the number by 10. If this new result is within 1.5 standard deviations of all values 30 days before and after the day in question, we keep the result, otherwise, we discard it. _I have less confidence in this solution than I do for Case 1._ #### Cases that are currently missed: - - Cases where _T~min~_ is small because of a typo. - - Cases where _T~max~_ is small because of a typo, but not smaller than _T~min~_. + - Cases where _T_min is small because of a typo. + - Cases where _T_max is small because of a typo, but not smaller than _T_min. #### Cases where this function is plain wrong: @@ -361,11 +352,11 @@ _I have less confidence in this solution than I do for Case 1._ #### Variables controlled for: - - _T~max~_ - - _T~min~_ - - _T~mean~_ + - _T_max + - _T_min + - _T_mean -__No other variables are currently tested; hydrological data is not tested. This data should not be considered "high quality", use of the data is your responsibility.__ Note that all values that are modified form their original values will be recorded in a new "Observations" column in the resultant tibble. +__No other variables are currently tested; hydrological data is not tested. This data should not be considered "high quality", use of the data is your responsibility.__ Note that all values that are modified from their original values will be recorded in a new "Observations" column in the resultant tibble. ## Disclaimer @@ -379,7 +370,9 @@ If that seems like a lot of work, just think about how much work it would have b ## Senamhi terms of use -Senamhi's terms of use were originally posted [here](http://www.senamhi.gob.pe/?p=0613), but that link is currently redirecting to the Senamhi home page. However, the text of the terms was identical to the [terms](http://www.peruclima.pe/?p=condiciones) of Senamhi's PeruClima website ([Google translation](https://translate.google.com/translate?hl=en&sl=es&tl=en&u=http%3A%2F%2Fwww.peruclima.pe%2F%3Fp%3Dcondiciones)). The terms allow for the free and public access to information on their website. Likewise, the data may be used in for-profit and non-profit applications. However, Senamhi stipulates that any use of the data must be accompanied by a disclaimer that Senamhi is the proprietor of the information. The following text is recommended (official text in Spanish): +Senamhi's terms of use are _technically_ supposed to be [here](http://senamhi.gob.pe/?p=terminos_condiciones), but that link is currently redirecting to the Senamhi home page. I authored this package when the terms were still available online. At the time of development, the terms allowed for the free and public access to information on their website, in both for-profit and non-profit applications. However, Senamhi stipulated that any use of the data must be accompanied by a disclaimer that Senamhi is the proprietor of the information. The following text was recommended (official text in Spanish): - **Official Spanish:** _Información recopilada y trabajada por el Servicio Nacional de Meteorología e Hidrología del Perú. El uso que se le da a esta información es de mi (nuestra) entera responsabilidad._ - **English translation:** This information was compiled and maintained by Peru's National Meteorology and Hydrology Service (Senamhi). The use of this data is of my (our) sole responsibility. + +A message similar to the English message above is printed to the R console whenever the package is loaded. diff --git a/data/catalogue.rda b/data/catalogue.rda index 5e90350..8b64f16 100644 Binary files a/data/catalogue.rda and b/data/catalogue.rda differ diff --git a/man/qc.Rd b/man/qc.Rd index 487f6c5..d9d4e2c 100644 --- a/man/qc.Rd +++ b/man/qc.Rd @@ -7,10 +7,10 @@ qc(dat) } \arguments{ -\item{dat}{an R object of type data.frame passed form the export_data script} +\item{dat}{a \code{tbl_df} generated form the \code{senamhiR} package} } \value{ -an R object of type data.frame. +a \code{tbl_df} } \description{ A helper function to perform minimal quality control on the data. diff --git a/man/station_explorer.Rd b/man/station_explorer.Rd deleted file mode 100644 index c70ff58..0000000 --- a/man/station_explorer.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/station_explorer.R -\name{station_explorer} -\alias{station_explorer} -\title{A Shiny interface to Senamhi weather and river stations} -\usage{ -station_explorer(local = FALSE) -} -\arguments{ -\item{local}{logical; if set to `TRUE`, we will show only the data that is available locally.} -} -\value{ -none -} -\description{ -A function to launch a shiny web app to explore Senamhi stations. -} -\examples{ -\dontrun{station_explorer()} -} -\author{ -Conor I. Anderson -} diff --git a/tests/testthat/test-download_data_sql.R b/tests/testthat/test-download_data_sql.R new file mode 100644 index 0000000..7b3c2d2 --- /dev/null +++ b/tests/testthat/test-download_data_sql.R @@ -0,0 +1,43 @@ +library("testthat") +library("senamhiR") + +context("Test `download_data_sql()`") + +## test senamhiR download (H, CON) +test_that("download_data_sql() can download data", { + out <- download_data_sql("230715") + expect_identical(names(out)[6], "Caudal (m^3/s)") + expect_that(out, is_a("tbl_df")) + expect_output(str(out), "6 variables") +}) + +## test senamhiR download by year (H, SUT) +test_that("download_data_sql() can filter by year", { + out <- senamhiR("472D23BE", 2001:2010) + expect_identical(names(out)[7], "Presion (mb)") + expect_that(out, is_a("tbl_df")) + expect_output(str(out), "3652 obs") + expect_output(str(out), "10 variables") +}) + +## test senamhiR download M, DAV +test_that("download_data_sql() can download DAV stations", { + out <- senamhiR("113129", 2001:2005) + expect_identical(names(out)[5], "Humedad (%)") + expect_that(out, is_a("tbl_df")) + expect_output(str(out), "1826 obs") + expect_output(str(out), "9 variables") +}) + +## test senamhiR can pad with zeroes +test_that("download_data_sql() can pad with zeroes", { + out <- senamhiR(401) + expect_identical(names(out)[11], "Prec19 (mm)") + expect_that(out, is_a("tbl_df")) + expect_output(str(out), "13 variables") +}) + +## should fail when no correct station is given +test_that("download_data_sql() fails when an incorrect station is requested", { + expect_error(download_data_sql("foo"), "Station ID appears invalid.", fixed=TRUE) +}) diff --git a/tests/testthat/test-map_stations.R b/tests/testthat/test-map_stations.R index 4053f0b..a0a8a05 100644 --- a/tests/testthat/test-map_stations.R +++ b/tests/testthat/test-map_stations.R @@ -6,10 +6,27 @@ context("Test `map_stations()`") ## test a map of one station test_that("map_stations() can map a single station", { - map_stations("000401") + map <- map_stations("000401") + expect_that(attr(map$x, "leafletData"), is_a("tbl_df")) + expect_output(str(attr(map$x, "leafletData")), "1 obs") + expect_output(str(attr(map$x, "leafletData")), "14 variables") +}) + +## test a map of one station padded with zeros +test_that("map_stations() can pad a StationID", { + map <- map_stations(401) + expect_that(attr(map$x, "leafletData"), is_a("tbl_df")) + expect_output(str(attr(map$x, "leafletData")), "1 obs") + expect_output(str(attr(map$x, "leafletData")), "14 variables") }) ## test a map of one searched stations test_that("map_stations() can map a station search result", { - map_stations(df) + map <- map_stations(df) + expect_identical(attr(map$x, "leafletData"), df) +}) + +## map_stations should fail if we ask for an invalid station +test_that("map_stations() fails if passed an invalid target", { + expect_error(map_stations("foo"), "One or more requested stations invalid.", fixed=TRUE) }) diff --git a/tests/testthat/test-qc.R b/tests/testthat/test-qc.R index c804854..8336325 100644 --- a/tests/testthat/test-qc.R +++ b/tests/testthat/test-qc.R @@ -10,10 +10,9 @@ test_that("qc can fix errors", { out <- qc(indat) expect_identical(names(out)[15], "Observations") expect_that(out, is_a("tbl_df")) - expect_output(str(out), "20820 obs") expect_output(str(out), "15 variables") expect_equal(out$`Tmin (C)`[19660], 23.2) - expect_identical(out$Observations[19660], "Tmin dps: 232 -> 23.2 (1.04)") + expect_identical(out$Observations[19660], "Tmin dps: 232 -> 23.2 (1.03)") }) ## should fail if not enough context diff --git a/tests/testthat/test-quick_audit.R b/tests/testthat/test-quick_audit.R index 5fc8368..b9146f7 100644 --- a/tests/testthat/test-quick_audit.R +++ b/tests/testthat/test-quick_audit.R @@ -6,20 +6,20 @@ context("Test `quick_audit()`") ## test quick audit by year and return percent missing values test_that("quick_audit() can audit by year", { - df <- quick_audit(indat, variables = c("Tmean", "Tmax", "Tmin"), by = "year") + df <- quick_audit(indat, variables = c("Tmax", "Tmin"), by = "year") expect_that(df, is_a("tbl_df")) expect_output(str(df), "3 obs") - expect_output(str(df), "4 variables") - expect_equal(df$`Tmean (C) pct NA`[1], 83.28767) + expect_output(str(df), "3 variables") + expect_equal(df$`Tmax (C) pct NA`[1], 83.28767) }) ## test quick audit by month and return number of missing values test_that("quick_audit() can audit by month", { - df <- quick_audit(indat, variables = c("Tmean", "Tmax", "Tmin"), by = "month", report = "n") + df <- quick_audit(indat, variables = c("Tmax", "Tmin"), by = "month", report = "n") expect_that(df, is_a("tbl_df")) expect_output(str(df), "36 obs") - expect_output(str(df), "9 variables") - expect_equal(df$`Tmean (C) tot NA`[3], 31) + expect_output(str(df), "7 variables") + expect_equal(df$`Tmax (C) tot NA`[3], 31) }) ## test quick_audit with missing variables and in reverse @@ -27,8 +27,8 @@ test_that("quick_audit() can audit with missing variables", { df <- quick_audit(indat, reverse = TRUE) expect_that(df, is_a("tbl_df")) expect_output(str(df), "3 obs") - expect_output(str(df), "14 variables") - expect_lt(df$`Tmean (C) pct present`[1], 16.71233) + expect_output(str(df), "13 variables") + expect_lt(df$`Tmax (C) pct present`[1], 16.71233) }) ## test quick_audit warns if "year" or "month" not set correctly diff --git a/tests/testthat/test-senamhiR.R b/tests/testthat/test-senamhiR.R index 86bd350..362ac55 100644 --- a/tests/testthat/test-senamhiR.R +++ b/tests/testthat/test-senamhiR.R @@ -6,19 +6,26 @@ context("Test `senamhiR()`") ## test senamhiR download test_that("senamhiR can download data", { out <- senamhiR("000401") - expect_identical(names(out)[12], "Prec19 (mm)") + expect_identical(names(out)[11], "Prec19 (mm)") expect_that(out, is_a("tbl_df")) - expect_output(str(out), "6940 obs") - expect_output(str(out), "14 variables") + expect_output(str(out), "13 variables") }) ## test senamhiR download by year test_that("senamhiR can filter by year", { out <- senamhiR("000401", 1998:2000) - expect_identical(names(out)[12], "Prec19 (mm)") + expect_identical(names(out)[11], "Prec19 (mm)") expect_that(out, is_a("tbl_df")) expect_output(str(out), "1096 obs") - expect_output(str(out), "14 variables") + expect_output(str(out), "13 variables") +}) + +## test senamhiR can pad with zeroes +test_that("senamhiR can pad with zeroes", { + out <- senamhiR(401) + expect_identical(names(out)[11], "Prec19 (mm)") + expect_that(out, is_a("tbl_df")) + expect_output(str(out), "13 variables") }) ## should fail when no correct station is given diff --git a/tests/testthat/test-station_search.R b/tests/testthat/test-station_search.R index ea3dd33..b1fbaf6 100644 --- a/tests/testthat/test-station_search.R +++ b/tests/testthat/test-station_search.R @@ -8,15 +8,15 @@ test_that("station_search() can locate a station by name regex", { df <- station_search("Tara*", glob = TRUE) expect_that(df, is_a("tbl_df")) expect_output(str(df), "4 obs") - expect_output(str(df), "12 variables") + expect_output(str(df), "14 variables") }) ## test finding a station by baseline test_that("station_search() can locate a station by baseline", { df <- station_search(baseline = 1965:2015) expect_that(df, is_a("tbl_df")) - expect_output(str(df), "346 obs") - expect_output(str(df), "12 variables") + expect_output(str(df), "363 obs") + expect_output(str(df), "14 variables") }) ## test finding a station by region @@ -24,15 +24,15 @@ test_that("station_search() can locate a station by region", { df <- station_search(region = "TACNA") expect_that(df, is_a("tbl_df")) expect_output(str(df), "56 obs") - expect_output(str(df), "12 variables") + expect_output(str(df), "14 variables") }) ## test finding a station by distance from target test_that("station_search() can locate a station by distance from target", { - df <- station_search(target = "000410", dist = 0:10) + df <- station_search(target = 410, dist = 0:10) expect_that(df, is_a("tbl_df")) expect_output(str(df), "2 obs") - expect_output(str(df), "13 variables") + expect_output(str(df), "15 variables") }) ## test finding a station by distance from coordinates @@ -40,7 +40,7 @@ test_that("station_search() can locate a station by distance for coordinates", { df <- station_search(target = c(-6.50, -76.47), dist = 0:10) expect_that(df, is_a("tbl_df")) expect_output(str(df), "2 obs") - expect_output(str(df), "13 variables") + expect_output(str(df), "15 variables") }) ## station_search should fail if we spell the region incorrectly @@ -52,3 +52,8 @@ test_that("station_search() fails if passed an incorrect region name", { test_that("station_search() fails if passed an incorrect region name", { expect_error(station_search(config = "Q"), "No data found for that config. Did you pass \"m\" or \"h\"?", fixed=TRUE) }) + +## station_search should fail if we ask for an invalid target +test_that("station_search() fails if passed an invalid target", { + expect_error(station_search(target = "foo"), "Target station appears invalid.", fixed=TRUE) +})