Bug fixes and fixing some tests.

ConorIA · Sep 24, 2018 · 159f57d · 159f57d
1 parent 98aad78
commit 159f57d
Show file tree

Hide file tree

Showing 10 changed files with 406 additions and 322 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: senamhiR
 Type: Package
 Title: A Collection of Functions to Obtain Peruvian Climate Data
-Version: 0.6.4
+Version: 0.6.5
 Date: 2018-06-27
 Authors@R: c(person(given = c("Conor", "I."), family = "Anderson", 
            role = c("aut","cre"), email = "[email protected]"),

diff --git a/R/qc.R b/R/qc.R
@@ -18,6 +18,8 @@
 
 qc <- function(dat) {
 
+  attrs_to_append <- append(attributes(dat)[4:length(attributes(dat))], list(`QC Date` = Sys.Date()))
+
   if (inherits(dat, "character") & !inherits(dat, "data.frame")) {
     if (length(dat) > 1L) {
       stop("Sorry, for now this script can only process one station at a time.")
@@ -51,7 +53,7 @@ qc <- function(dat) {
   }
 
   if (length(minshifts) > 0) {
-    for (i in 1:length(maxshifts)) {
+    for (i in 1:length(minshifts)) {
       bad_table <- select(dat, Fecha, var = `Tmin (C)`)
       fixes <- .fix_bad_data(bad_table, minshifts[i], "Tmin", "dps")
       dat$`Tmin (C)`[minshifts[i]] <- unlist(fixes[1])
@@ -86,5 +88,8 @@ qc <- function(dat) {
   observations[is.na(observations)] <- ''
   dat <- add_column(dat, Observations = observations)
 
+  attributes(dat) <- append(attributes(dat), attrs_to_append)
+  rownames(dat) <- NULL
+
   dat
 }
diff --git a/R/quick_audit.R b/R/quick_audit.R
@@ -4,7 +4,7 @@
 ##'
 ##' @param station character; a station id number to process or a \code{tbl_df} containing the data to process
 ##' @param variables numeric or character; by default, all variables will be included. Pass a numeric vector to specify columns to include, or pass a character vector to try to match column names
-##' @param by character; whether values should be reported annually (\code{by = "year"}), or monthly (\code{by = "month"})
+##' @param by character; whether values should be reported annually (\code{by = "year"}), monthly (\code{by = "month"}, or as an overall (default).
 ##' @param report character; whether values should be reported as percentage missing (\code{report = "pct"}) or as number of missing values (\code{report = "n"})
 ##' @param reverse Boolean; if \code{TRUE}, will show percentage present instead of percentage missing (only applies if \code{report = "pct"})
 ##'
@@ -21,24 +21,25 @@
 ##' \dontrun{quick_audit("000401", 2:10, by = "month", report = "n")}
 ##'
 
-quick_audit <- function(station, variables, by = "year", report = "pct", reverse = FALSE) {
-
+quick_audit <- function(station, variables, by = NULL, report = "pct", reverse = FALSE) {
+  
   if (inherits(station, "tbl_df")) {
     dat <- station
   } else {
     if (inherits(station, "character")) {
-      dat <- download_data_sql(station)
+      dat <- download_data(station)
     } else {
       stop("I can't figure out what data you've given me.")
     }
   }
-
-  if (by != "month" && by != "year") {
-    warning("By was neither \"month\" nor \"year\". Defaulting to year.")
+
+  if (!is.null(by) && (by != "month" && by != "year")) {
+    warning("By was neither \"month\" nor \"year\". Defaulting to overall total.")
+    by <- NULL
   }
-
+  
   if (missing(variables)) {
-    variables <- 2:ncol(dat)
+    variables <- min(which(!(names(dat) %in% c("StationID", "Fecha")))):ncol(dat)
   } else {
     if (inherits(variables, "character")) {
       for (var in seq_along(variables)) {
@@ -47,61 +48,72 @@ quick_audit <- function(station, variables, by = "year", report = "pct", reverse
     }
     variables <- as.numeric(variables)
   }
-
+  
   if (reverse) {
     ctl = 1
     metric = "present"
   } else {
     ctl = 0
     metric = "NA"
   }
-
+  
   years <- min(format(dat$Fecha, format = "%Y")):max(format(dat$Fecha, format = "%Y"))
-
-  if (by == "month") {
-    months <- min(format(dat$Fecha, format = "%m")):max(format(dat$Fecha, format = "%m"))
-    yearmons <- apply(expand.grid(sprintf("%02d", months), years), 1, function(x) paste(x[2], x[1], sep = "-"))
-    out <- tibble(`Year-month` = yearmons)
-    timestep <- yearmons
+
+  if (is.null(by)) {
+    timestep <- 1
+    out <- tibble(Report = "Total")
   } else {
-    out <- tibble(Year = years)
-    timestep <- years
+    if (by == "month") {
+      months <- min(format(dat$Fecha, format = "%m")):max(format(dat$Fecha, format = "%m"))
+      yearmons <- apply(expand.grid(sprintf("%02d", months), years), 1, function(x) paste(x[2], x[1], sep = "-"))
+      out <- tibble(`Year-month` = yearmons)
+      timestep <- yearmons
+    } else {
+      out <- tibble(Year = years)
+      timestep <- years
+    }
   }
-
-    for (var in variables) {
-      integrity <- missingvec <- consecNAvec <- NULL
-      for (t in timestep) {
-        colname <- colnames(dat)[var]
+
+  for (var in variables) {
+    integrity <- missingvec <- consecNAvec <- NULL
+    for (t in timestep) {
+      colname <- colnames(dat)[var]
+      if (is.null(by)) {
+        timely <- dat  
+      } else {
         if (by == "month") {
           timely <- dat[format(dat$Fecha, format = "%Y-%m") == t,]
         } else {
           timely <- dat[format(dat$Fecha, format = "%Y") == t,]
         }
-        obs <- nrow(timely)
-        missing <- sum(is.na(timely[[var]]))
-        if (report == "pct") {
-          integrity <- c(integrity, as.numeric(abs(ctl-(missing/obs))*100))
-        } else {
-          missingvec <- c(missingvec, as.integer(missing))
-          consecNA <- rle(is.na(timely[[var]]))
-          consecNA <- max(consecNA$lengths[consecNA$values], 0)
-          consecNAvec <- c(consecNAvec, as.integer(consecNA))
-        }
       }
+      obs <- nrow(timely)
+      missing <- sum(is.na(timely[[var]]))
       if (report == "pct") {
-        out <- add_column(out, integ = integrity)
-        names(out)[which(names(out) == "integ")] <- paste(colname, "pct", metric)
+        integrity <- c(integrity, as.numeric(abs(ctl-(missing/obs))*100))
       } else {
-        out <- add_column(out, consec = consecNAvec, rand = missingvec)
-        names(out)[which(names(out) == "consec")] <- paste(colname, "consec NA")
-        names(out)[which(names(out) == "rand")] <- paste(colname, "tot NA")
+        missingvec <- c(missingvec, as.integer(missing))
+        consecNA <- rle(is.na(timely[[var]]))
+        consecNA <- max(consecNA$lengths[consecNA$values], 0)
+        consecNAvec <- c(consecNAvec, as.integer(consecNA))
       }
-
     }
-    if (by == "month") {
-      out$`Year-month` <- as.yearmon(out$`Year-month`)
-      out <- add_column(out, Year = format(out$`Year-month`, format = "%Y"), Month = format(out$`Year-month`, format = "%m"), .before = 1)
+    if (report == "pct") {
+      out <- add_column(out, integ = integrity)
+      names(out)[which(names(out) == "integ")] <- paste(colname, "pct", metric)
+    } else {
+      out <- add_column(out, consec = consecNAvec, rand = missingvec)
+      names(out)[which(names(out) == "consec")] <- paste(colname, "consec NA")
+      names(out)[which(names(out) == "rand")] <- paste(colname, "tot NA")
     }
-
+
+  }
+  if (is.null(by)) {
+    out <- out[,-1]
+  } else if (by == "month") {
+    out$`Year-month` <- as.yearmon(out$`Year-month`)
+    out <- add_column(out, Year = format(out$`Year-month`, format = "%Y"), Month = format(out$`Year-month`, format = "%m"), .before = 1)
+  }
+
   return(out)
 }
diff --git a/README.Rmd b/README.Rmd
@@ -1,8 +1,7 @@
 ---
 title: "senamhiR: A collection of functions to obtain Peruvian climate data in R"
 output: 
-  html_document: 
-    keep_md: yes
+  github_document
 ---
 
 ```{r setup, include=FALSE}
@@ -11,8 +10,6 @@ library(senamhiR)
 library(dplyr)
 ```
 
-[![build status](https://gitlab.com/ConorIA/senamhiR/badges/master/build.svg)](https://gitlab.com/ConorIA/senamhiR/commits/master) [![Build status](https://ci.appveyor.com/api/projects/status/60kbu1b7wkf7akqn?svg=true)](https://ci.appveyor.com/project/ConorIA/senamhir-bxb45) [![codecov](https://codecov.io/gl/ConorIA/senamhiR/branch/master/graph/badge.svg)](https://codecov.io/gl/ConorIA/senamhiR)
-
 The package provides an automated solution for the acquisition of archived Peruvian climate and hydrology data directly within R. The data was compiled from the Senamhi website, and contains all of the data that was available as of April 10, 2018. This data was originally converted from HTML, and is stored in a MySQL database in tibble format.
 
 It is important to note that the info on the Senamhi website has not undergone quality control, however, this package includes a helper function to perform the most common quality control operations for the temperature variables. More functions will be added in the future.
@@ -103,13 +100,13 @@ There are two functions included to perform some basic quality control.
 
 ### `quick_audit()`
 
-The `quick_audit()` function will return a tibble listing the percentage or number of missing values for a station. For instance, the following command will return the percentage of missing values in our 30-year Requena dataset:
+The `quick_audit()` function will return a tibble listing the percentage or number of missing values for a station. For instance, the following command will return the percentage of missing values in our 30-year Requena data set:
 
 ```{r}
 quick_audit(requ, c("Tmax", "Tmin"))
 ```
 
-Use `report = "n"` to show the _number_ of missing values. Use `by = "month"` to show missing data by month instead of year. For instance, the number of days for which Mean Temperature was missing at Tocache in 1980:
+Use `report = "n"` to show the _number_ of missing values. Use `by = "month"` or `by = "year"` to show missing data by month or year. For instance, the number of days for which Mean Temperature was missing at Tocache in 1980:
 
 ```{r}
 toca <- senamhiR("000463", year = 1980)
@@ -172,7 +169,7 @@ If that seems like a lot of work, just think about how much work it would have b
 
 ## Senamhi terms of use
 
-Senamhi's terms of use are [here](http://senamhi.gob.pe/?p=terminos_condiciones), but as of writing that link was redirecting to the Senamhi home page. An archived version is availale [here](https://web.archive.org/web/20170822092538/http://senamhi.gob.pe/?p=terminos_condiciones). The terms allow for the free and public access to information on the Senamhi website, in both for-profit and non-profit applications. However, Senamhi stipulates that any use of the data must be accompanied by a disclaimer that Senamhi is the proprietor of the information. The following text is recommended (official text in Spanish):
+Senamhi's terms of use are [here](http://senamhi.gob.pe/?p=terminos_condiciones), but as of writing that link was redirecting to the Senamhi home page. An archived version is available [here](https://web.archive.org/web/20170822092538/http://senamhi.gob.pe/?p=terminos_condiciones). The terms allow for the free and public access to information on the Senamhi website, in both for-profit and non-profit applications. However, Senamhi stipulates that any use of the data must be accompanied by a disclaimer that Senamhi is the proprietor of the information. The following text is recommended (official text in Spanish):
 
 - **Official Spanish:** _Información recopilada y trabajada por el Servicio Nacional de Meteorología e Hidrología del Perú. El uso que se le da a esta información es de mi (nuestra) entera responsabilidad._
 - **English translation:** This information was compiled and maintained by Peru's National Meteorology and Hydrology Service (Senamhi). The use of this data is of my (our) sole responsibility.