diff --git a/.Rbuildignore b/.Rbuildignore index b158c09..b8e4acc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,5 +6,6 @@ appveyor.yml .gitattributes .gitignore README.md +README.Rmd LICENSE.md -install_senamhiR.R \ No newline at end of file +install_senamhiR.R diff --git a/DESCRIPTION b/DESCRIPTION index 2dc4666..e74997c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,19 +13,16 @@ Description: A collection of functions to obtain archived Peruvian climatologica Depends: R (>= 3.1.0) Imports: - curl, DBI, DT, dplyr, geosphere, leaflet, magrittr, - readr, RMySQL, shiny, tibble, utils, - XML, zoo Suggests: testthat, diff --git a/NAMESPACE b/NAMESPACE index 5b36547..e6158c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,16 +1,12 @@ # Generated by roxygen2: do not edit by hand -export(download_data) export(download_data_sql) -export(generate_local_catalogue) export(map_stations) export(qc) export(quick_audit) -export(read_data) export(senamhiR) export(station_explorer) export(station_search) -export(write_data) importFrom(DBI,dbConnect) importFrom(DBI,dbDisconnect) importFrom(DBI,dbGetQuery) @@ -20,9 +16,6 @@ importFrom(DT,dataTableOutput) importFrom(DT,datatable) importFrom(DT,renderDataTable) importFrom(RMySQL,MySQL) -importFrom(XML,htmlTreeParse) -importFrom(XML,readHTMLTable) -importFrom(curl,curl_download) importFrom(dplyr,filter) importFrom(dplyr,select) importFrom(geosphere,distGeo) @@ -32,8 +25,6 @@ importFrom(leaflet,awesomeIcons) importFrom(leaflet,leaflet) importFrom(leaflet,setView) importFrom(magrittr,"%>%") -importFrom(readr,read_csv) -importFrom(readr,write_csv) importFrom(shiny,br) importFrom(shiny,column) importFrom(shiny,em) @@ -50,6 +41,4 @@ importFrom(tibble,as_tibble) importFrom(tibble,has_name) importFrom(tibble,tibble) importFrom(utils,glob2rx) -importFrom(utils,setTxtProgressBar) -importFrom(utils,txtProgressBar) importFrom(zoo,as.yearmon) diff --git a/R/clean_table.R b/R/clean_table.R index faae225..4924b53 100644 --- a/R/clean_table.R +++ b/R/clean_table.R @@ -9,6 +9,7 @@ #' #' @return tbl_df #' +#' @importFrom tibble has_name #' @keywords internal #' #' @author Conor I. Anderson @@ -101,4 +102,4 @@ } } datain -} \ No newline at end of file +} diff --git a/R/data.R b/R/data.R index f522005..5bfb5e0 100644 --- a/R/data.R +++ b/R/data.R @@ -18,8 +18,5 @@ #' \item{Province}{the province where the station is located} #' \item{District}{the district where the station is located} #' } -#' -#' @examples -#' \dontrun{catalogue} +#' "catalogue" - diff --git a/R/download_action.R b/R/download_action.R deleted file mode 100644 index 30f654a..0000000 --- a/R/download_action.R +++ /dev/null @@ -1,30 +0,0 @@ -##' @title [DEPRECATED] Curl helper -##' -##' @description A helper function to execute download actions using curl. -##' -##' @param url character; address to be downloaded. -##' @param filename character; name to save the downloaded file under. -##' @param write_mode character; if set to 'overwrite' the script will overwrite file if it exists. -##' -##' @return None -##' -##' @keywords internal -##' -##' @author Conor I. Anderson -##' -##' @importFrom curl curl_download - -.download_action <- function(url, filename, write_mode = "z") { - if (!file.exists(filename) | write_mode == "overwrite" | file.info(filename)$size < 500) { - download <- try(curl_download(url, filename)) - if (inherits(download, "try-error") | file.info(filename)$size < 500) { - warning("Caught an error. Retrying file.", immediate. = TRUE) - unlink(filename) - download <- try(curl_download(url, filename)) - if (inherits(download, "try-error") | file.info(filename)$size < 500) { - unlink(filename) - stop("Could not download the requested file.") - } - } - } -} diff --git a/R/download_data.R b/R/download_data.R deleted file mode 100644 index d6bad0a..0000000 --- a/R/download_data.R +++ /dev/null @@ -1,54 +0,0 @@ -##' @title [DEPRECATED] Download from the Peruvian National Hydrological and Meterological Service -##' -##' @description Download Peruvian historical climate data from the Senamhi web portal. -##' -##' @param station character; the station id number to process. -##' @param year numerical; a vector of years to process. -##' @param write_mode character; if set to 'overwrite', the script will overwrite downloaded files if they exist. -##' -##' @return None -##' -##' @author Conor I. Anderson -##' -##' @importFrom curl curl_download -##' @importFrom utils setTxtProgressBar txtProgressBar -##' -##' @export -##' -##' @examples -##' \dontrun{download_data('000401', 1971:2000)} - -download_data <- function(station, year, write_mode = "z") { - - stationData <- catalogue[catalogue$StationID == station, ] - stationName <- stationData$Station - region <- stationData$Region - type = stationData$Type - config = stationData$Configuration - - month <- sprintf("%02d", 1:12) - dates <- apply(expand.grid(month, year), 1, function(x) paste0(x[2], x[1])) - - ## genURLs - urlList <- paste0("http://www.senamhi.gob.pe/include_mapas/_dat_esta_tipo02.php?estaciones=", - station, "&tipo=", type, "&CBOFiltro=", dates, "&t_e=", config) - - foldername <- paste0(region, "/HTML/", as.character(station), " - ", stationName) - if (!dir.exists(foldername)) { - check <- try(dir.create(foldername, recursive = TRUE)) - if (inherits(check, "try-error")) { - stop("I couldn't write out the directory. Check your permissions.") - } - } - - ## Download the data - print("Downloading the requested data.") - ## Set up a progress Bar - prog <- txtProgressBar(min = 0, max = length(urlList), style = 3) - on.exit(close(prog)) - for (i in 1:length(urlList)) { - filename <- paste0(foldername, "/", dates[i], ".html") - .download_action(url = urlList[i], filename, write_mode) - setTxtProgressBar(prog, value = i) - } -} diff --git a/R/generate_catalogue.R b/R/generate_catalogue.R deleted file mode 100644 index 020fbe5..0000000 --- a/R/generate_catalogue.R +++ /dev/null @@ -1,108 +0,0 @@ -##' @title [DEPRECATED] A function to determine the full catalogue of available Peruvian National Hydrological and Meterological Service stations -##' -##' @description Generate a .rda file containing a list of all of the stations operated by Senamhi. You should not need to execute this function, as the data is already included in the package. -##' -##' @return catalogue.rda -##' -##' @keywords internal -##' -##' @author Conor I. Anderson -##' -##' @importFrom tibble as_tibble -##' @importFrom XML htmlTreeParse - -.generate_catalogue <- function() { - - vector <- seq(1, 25, by = 1) - vector <- vector[-7] - vector <- sprintf("%02d", vector) - urlList <- paste0("http://www.senamhi.gob.pe/include_mapas/_map_data_hist03.php?drEsta=", - vector) - - Sys.setlocale(category = "LC_ALL", locale = "C") - catalogue = NULL - dir <- tempdir() - - for (i in seq_along(vector)) { - .download_action(url = urlList[i], filename = paste0(dir, "/", vector[i], - ".html")) - data <- htmlTreeParse(paste0(dir, "/", vector[i], ".html")) - data <- unlist(data[3]) - data <- data[21] - data <- strsplit(data, "var ubica")[[1]] - j <- 2 - while (j <= length(data)) { - row <- strsplit(data[j], ",")[[1]] - name <- strsplit(row[3], " - ")[[1]] - ## There are a couple of cases where the station name is formatted with a spaced - ## hyphen - if (length(name) == 3) - name <- c(paste(name[1:2], collapse = " - "), name[3]) - name <- gsub("'", "", name) - station <- as.character(name[2]) - period <- try(.guess_period(station)) - if (!inherits(period, "try-error")) { - start <- period[1] - end <- period[2] - } else { - start <- NA - end <- NA - } - config <- try(.guess_config(station)) - if (!inherits(config, "try-error")) { - type <- config[1] - config <- config[2] - } else { - type <- NA - config <- NA - } - row <- c(name, type, config, start, end, row[13], row[6:10]) - - ## Commands to clean up the data - row <- gsub("\303\221", 'N', row) - row <- gsub("\321", 'N', row) - row <- gsub("'", '', row) - row <- gsub("\\\\", '', row) - row <- gsub("));", '', row) - row <- gsub("}\r\n}", '', row) - row <- gsub("^\\s+|\\s+$", "", row) - # Set station status - if (row[7] == "C" | row[7] == "P") - row[7] <- "closed" - if (row[7] == "F") - row[7] <- "working" - # Convert lat/long to decimal degrees - latitude <- as.numeric(unlist(strsplit(row[8], split = " "))) - latitude <- -(latitude[1] + (latitude[2]/60) + (latitude[3]/3600)) - row[8] <- latitude - longitude <- as.numeric(unlist(strsplit(row[9], split = " "))) - longitude <- -(longitude[1] + (longitude[2]/60) + (longitude[3]/3600)) - row[9] <- longitude - # Add it to the catalogue - catalogue <- rbind(catalogue, row) - j <- j + 1 - } - colnames(catalogue) <- c("Station", "StationID", "Type", "Configuration", - "Data Start", "Data End", "Station Status", "Latitude", "Longitude", - "Region", "Province", "District") - } - rownames(catalogue) <- NULL - catalogue <- as_tibble(catalogue) - catalogue$Type <- as.factor(catalogue$Type) - catalogue$Configuration <- as.factor(catalogue$Configuration) - catalogue$`Data Start` <- as.integer(catalogue$`Data Start`) - catalogue$`Data End` <- as.integer(catalogue$`Data End`) - catalogue$`Station Status` <- as.factor(catalogue$`Station Status`) - catalogue$Latitude <- as.numeric(catalogue$Latitude) - catalogue$Longitude <- as.numeric(catalogue$Longitude) - #catalogue$Region <- as.factor(catalogue$Region) - #catalogue$Province <- as.factor(catalogue$Province) - #catalogue$District <- as.factor(catalogue$District) - catalogue$`Data Start`[875] <- 1963 # Temporary fix for bad dates - catalogue$`Data Start`[1330] <- 1900 # Temporary fix (and not a very good one!) for bad dates - comment(catalogue) <- "Note: The Senamhi database detailing available historical information has not been updated since 2010, as such, any station with data available until 2010 is assumed to be current, and has been marked as having data until \"2010+\". Actual data availability may vary for these stations. Especially for closed stations." - save(catalogue, file = "catalogue.rda", compress = "xz", compression_level = 9) - return("Catalogue saved as catalogue.rda") -} - - diff --git a/R/generate_local_catalogue.R b/R/generate_local_catalogue.R deleted file mode 100644 index 4e04470..0000000 --- a/R/generate_local_catalogue.R +++ /dev/null @@ -1,61 +0,0 @@ -##' @title [DEPRECATED] Generate Local Catalogue -##' -##' @description Generate or update a catalogue of locally downloaded stations. -##' -##' @param station character; optional character string to specify one or more stations. -##' @param localcatalogue character; optional character string to specify catalogue object to update. -##' -##' @return A data frame containing the details of matching stations. -##' -##' @importFrom readr read_csv -##' @importFrom tibble add_column has_name -##' -##' @export -##' -##' @author Conor I. Anderson -##' -##' @examples -##' # Update catalogue information for 'Tarapoto'. -##' \dontrun{generate_local_catalogue("000401")} -##' - -generate_local_catalogue <- function(station, localcatalogue){ - - if (missing(localcatalogue)) { - if (file.exists("local_catalogue.rda")) { - load("local_catalogue.rda") - } else { - localcatalogue <- as_tibble(catalogue) - localcatalogue <- add_column(localcatalogue, `Period (Yr)` = rep(NA, nrow(localcatalogue)), .after = 6) - localcatalogue <- add_column(localcatalogue, Downloaded = rep(NA, nrow(localcatalogue))) - } - } - - if (missing(station)) { - print("No station specified, looking for all stations. This may take a while!") - station <- localcatalogue$StationID - } - - if (length(station) > 1){ - lapply(station, generate_local_catalogue) - return("Bulk action finished.") - } - - print(paste0("Checking station ", station, "...") ) - row <- grep(station, catalogue$StationID) - if (length(row) != 1) { - stop("I could not identify the station. Please check the station number and try again.") - } - - dat <- try(read_data(station), silent = TRUE) - if (inherits(dat, "try-error")) { - return(warning("There was an error checking that station. The file might not exist.", call. = FALSE, immediate. = TRUE)) - } - - if (is.na(localcatalogue$`Data Start`[row]) | localcatalogue$`Data Start`[row] != format(dat$Fecha[1], format = "%Y")) localcatalogue$`Data Start`[row] <- format(dat$Fecha[1], format = "%Y") - if (is.na(localcatalogue$`Data End`[row]) | localcatalogue$`Data End`[row] != format(dat$Fecha[nrow(dat)], format = "%Y")) localcatalogue$`Data End`[row] <- format(dat$Fecha[nrow(dat)], format = "%Y") - localcatalogue$`Period (Yr)`[row] <- 1 + as.numeric(localcatalogue$`Data End`[row]) - as.numeric(localcatalogue$`Data Start`[row]) - if (is.na(localcatalogue$Downloaded[row]) | localcatalogue$Downloaded[row] != "Yes") localcatalogue$Downloaded[row] <- "Yes" - save(localcatalogue, file = "local_catalogue.rda", compress = "xz", compression_level = 9) - return("Values updated.") -} \ No newline at end of file diff --git a/R/guess_config.R b/R/guess_config.R deleted file mode 100644 index 91c4340..0000000 --- a/R/guess_config.R +++ /dev/null @@ -1,86 +0,0 @@ -##' @title [DEPRECATED] Guess station characteristics -##' -##' @description Attempt to guess station characteristics. -##' -##' @param station character; the station id number to process. -##' @param ... Additional arguments passed to \code{\link{.download_action}}. -##' -##' @return vector -##' -##' @keywords internal -##' -##' @author Conor I. Anderson -##' -##' @importFrom XML htmlTreeParse -##' -##' @examples -##' \dontrun{.guess_config('000401')} - -.guess_config <- function(station, ...) { - - ## Ask user to input variables - if (missing(station)) - station <- readline(prompt = "Enter station number: ") - - ## genURL - url <- paste0("http://www.senamhi.gob.pe/include_mapas/_dat_esta_tipo.php?estaciones=", - station) - - ## Download the data - print(paste0("Checking station characteristics for ", station, ".")) - filename <- tempfile() - .download_action(url, filename, ...) - station_data <- htmlTreeParse(filename) - station_data <- unlist(station_data[3]) - station_data <- station_data[grep("_dat_esta_tipo02.php", station_data)] - - ## Check config - test <- grep("t_e=M1", station_data) - if (length(test) > 0) { - config <- "M1" - } else { - test <- grep("t_e=M2", station_data) - if (length(test) > 0) { - config <- "M2" - } else { - test <- grep("t_e=M", station_data) - if (length(test) > 0) { - config <- "M" - } else { - test <- grep("t_e=H", station_data) - if (length(test) > 0) { - config <- "H" - } else { - config <- "ERROR" - } - } - } - } - - ## Check station type - test <- grep("tipo=CON", station_data) - if (length(test) > 0) { - type <- "CON" - } else { - test <- grep("tipo=DAV", station_data) - if (length(test) > 0) { - type <- "DAV" - } else { - test <- grep("tipo=SUT", station_data) - if (length(test) > 0) { - type <- "SUT" - } else { - test <- grep("tipo=SIA", station_data) - if (length(test) > 0) { - type <- "SIA" - } else { - type <- "ERROR" - } - } - } - } - result <- c(type, config) - if (result[1] == "ERROR" | result[2] == "ERROR") - stop("We could not determine the configuration of this station.") - return(result) -} diff --git a/R/guess_period.R b/R/guess_period.R deleted file mode 100644 index 84e4fc6..0000000 --- a/R/guess_period.R +++ /dev/null @@ -1,49 +0,0 @@ -##' @title [DEPRECATED] Query available data from the Peruvian National Hydrological and Meterological Service -##' -##' @description Query the available data for a given station from the Senamhi web portal. -##' -##' @param station character; the station id number to process. -##' @param automatic logical; if set to true (default), the script will attempt to guess the startYear and endYear values. -##' @param ... Additional arguments passed to \code{\link{.download_action}}. -##' -##' @return data.frame -##' -##' @keywords internal -##' -##' @author Conor I. Anderson -##' -##' @importFrom XML readHTMLTable -##' -##' @examples -##' \dontrun{.guess_period('000401')} - -.guess_period <- function(station, automatic = TRUE, ...) { - - ## genURL - url <- paste0("http://www.senamhi.gob.pe/include_mapas/_dat_esta_periodo.php?estaciones=", - station) - - ## Download the data - print(paste0("Checking data at ", station, ".")) - filename <- tempfile() - .download_action(url, filename, ...) - - table <- readHTMLTable(filename, as.data.frame = TRUE) - table <- as.data.frame(table[3]) - if (ncol(table) > 1) { - names(table) <- c("Parameter", "DataFrom", "DataTo") - if (automatic == TRUE) { - startYear <- min(as.numeric(levels(table$DataFrom))) - endYear <- max(as.numeric(levels(table$DataTo))) - if (endYear == 2010) { - endYear <- "2010+" - } - result <- c(startYear, endYear) - return(result) - } else { - return(table) - } - } else { - stop("We could not determine data availability for this station.") - } -} diff --git a/R/guess_year.R b/R/guess_year.R deleted file mode 100644 index 8e20ce5..0000000 --- a/R/guess_year.R +++ /dev/null @@ -1,40 +0,0 @@ -#' [DEPRECATED] Guess the period of available compiled data -#' -#' @param station character; station ID to process -#' @param fallback numeric; vector of year to fall back on -#' -#' @return Vector of years of archived data -#' -#' @keywords internal - -.guess_year <- function(station, fallback) { - station_data <- catalogue[catalogue$StationID == station, ] - if (is.na(station_data$`Data Start`) || is.na(station_data$`Data End`)) { - if (missing(fallback)) { - print("Available data undefined and no fallback specified. Skipping this station.") - return("No period defined.") - } - if (is.na(station_data$`Data Start`) && !is.na(station_data$`Data End`)) { - print(paste("Data start undefined. Using fallback from", min(fallback), "to", station_data$`Data End`)) - year <- min(fallback):station_data$`Data End` - } - if (!is.na(station_data$`Data Start`) && is.na(station_data$`Data End`)) { - print(paste("Data end undefined. Using fallback from", station_data$`Data Start`, "to", max(fallback))) - year <- station_data$`Data Start`:max(fallback) - } - if (is.na(station_data$`Data Start`) && is.na(station_data$`Data End`)) { - print(paste("Available data undefined. Using fallback from", min(fallback), "to", max(fallback))) - year <- min(fallback):max(fallback) - } - } else { - if (station_data$`Data End` == "2010+") { - print(paste("Not sure when data period ends. We will try until", - (as.numeric(format(Sys.Date(), format = "%Y")) - 1))) - endYear <- as.numeric(format(Sys.Date(), format = "%Y")) - 1 - } else { - endYear <- station_data$`Data End` - } - year <- station_data$`Data Start`:endYear - } - year -} \ No newline at end of file diff --git a/R/read_data.R b/R/read_data.R deleted file mode 100644 index 4207b19..0000000 --- a/R/read_data.R +++ /dev/null @@ -1,75 +0,0 @@ -##' @title [DEPRECATED] Read data from the Peruvian National Hydrological and Meterological Service -##' -##' @description Read a CSV file of Peruvian historical climate data from the Senamhi web portal. -##' -##' @param station character; the station id number to process. -##' @param path character; the path to the file. By default, the script expects the csv files to be sorted by region. -##' -##' @return A tibble (tbl_df) of the relative information. -##' -##' @author Conor I. Anderson -##' -##' @importFrom tibble has_name -##' @importFrom readr read_csv -##' -##' @export -##' -##' @examples -##' \dontrun{read_data('000401')} - -read_data <- function(station, path = "default") { - - ## Fail if we try to read multiple stations - if (length(station) > 1) { - stop("Sorry, for now I can only read a single station at a time.") - } - - row <- grep(station, catalogue$StationID) - if (length(row) != 1) { - stop("I could not identify the station. Please check the station number and try again.") - } - - if (path == "default") { - folder <- paste0(catalogue$Region[row], "/") - } else { - folder <- if (path == "") "." else path - } - - filename <- paste0(folder, "/", station, " - ", catalogue$Station[row], ".csv") - if (!file.exists(filename)) { - stop("I can't find a csv file for that station. Please use the `export_data()` function to create it.") - } - - # Generate the column types - if (catalogue$Configuration[row] == "H") { - if (catalogue$Type[row] == "CON") - types <- "Dddddd" - if (catalogue$Type[row] == "SUT") - types <- "Dddddddccd" - } else { - if (catalogue$Type[row] == "CON") - types <- "Ddddddddddddcc" - if (catalogue$Type[row] == "SUT" | catalogue$Type[row] == "SIA" | catalogue$Type[row] == "DAV") - types <- "Dddddddcc" - } - - # Read the .csv file - dat <- tryCatch({ - read_csv(filename, col_types = types) - }, warning = function(w) { - read_csv(filename, col_types = paste0(types, "c")) - }, error = function(e) { - return("I could not read the file. Please ensure that it exists and that you have the right permissions.") - }) - - # Fix the "Volocidad del Viento" column, which is sometimes numeric and sometimes integer (avoid false precision) - if (has_name(dat, "Velocidad del Viento (m/s)")) { - if (length(grep(".", dat$`Velocidad del Viento (m/s)`, fixed = TRUE)) > 0) { - dat$`Velocidad del Viento (m/s)` <- as.double(dat$`Velocidad del Viento (m/s)`) - } else { - dat$`Velocidad del Viento (m/s)` <- as.integer(dat$`Velocidad del Viento (m/s)`) - } - } - - return(dat) -} diff --git a/R/senamhiR.R b/R/senamhiR.R index 2ab38c4..23f496e 100644 --- a/R/senamhiR.R +++ b/R/senamhiR.R @@ -4,9 +4,6 @@ ##' ##' @param station character; the station id number to process. Can also be a vector of station ids, which will be returned as a list. ##' @param year numerical; a vector of years to process. Defaults to the full known range. -##' @param tasks [DEPRECATED] numerical; define which tasks to perform: 1) Download Data, 2) Compile CSV of Downloaded Data, 3) Both. -##' @param fallback [DEPRECATED] vector; if no year is specified, and the period of available data is unknown, this vector will provide a fallback start and end year to download. If not specified, such stations will be skipped. -##' @param write_mode [DEPRECATED] character; if set to 'append', the script will append the data to an exisiting file; if set to 'overwrite', it will overwrite an existing file. If not set, it will not overwrite. ##' ##' @author Conor I. Anderson ##' @@ -18,44 +15,25 @@ ##' \dontrun{senamhiR('000401', 1998:2015)} ##' \dontrun{senamhiR(c('000401', '000152', '000219'))} -senamhiR <- function(station, year, tasks, fallback, write_mode = "z") { +senamhiR <- function(station, year) { if (missing(station)) { station <- readline(prompt = "Enter station number(s) separated by commas: ") station <- trimws(unlist(strsplit(station, split = ","))) } if (!station %in% catalogue$StationID) { - stop("The station requested is not a valid station.") + stop("One or more requested stations invalid.") } - # If tasks is not specified (good), use MySQL - if (missing(tasks)) { - dataout <- list() - for (stn in station) { - if (missing(year)) year <- 1900:2100 - temp <- download_data_sql(stn, year) - dataout <- c(dataout, list(temp)) - } - if (length(station) == 1) { - return(dataout[[1]]) - } else { - return(dataout) - } - + dataout <- list() + for (stn in station) { + if (missing(year)) year <- 1900:2100 + temp <- download_data_sql(stn, year) + dataout <- c(dataout, list(temp)) + } + if (length(station) == 1) { + return(dataout[[1]]) } else { - # Use deprecated methods - if (length(station) > 1) { - lapply(station, senamhiR, year = year, tasks = tasks, fallback = fallback, - write_mode = write_mode) - return("Bulk action completed.") - } - if (missing(year)) .guess_year(stn, fallback) - print(paste0("Processing station ", station, ".")) - if (tasks == 1 || tasks == 3) { - download_data(station = station, year = year) - } - if (tasks == 2 || tasks == 3) { - write_data(station = station, year = year, write_mode = write_mode) - } + return(dataout) } } diff --git a/R/trim_HTML.R b/R/trim_HTML.R deleted file mode 100644 index e926a34..0000000 --- a/R/trim_HTML.R +++ /dev/null @@ -1,115 +0,0 @@ -##' @title [DEPRECATED] HTML file trimmer -##' -##' @description A helper function to trim HTML files for years with missing data. -##' -##' @param station character; the StationID of the station to process. -##' @param localcatalogue character; optional character string to specify catalogue object to update. -##' @param interactive boolean; whether user should be prompted about deletions and catalogue updates. -##' -##' @keywords internal -##' -##' @author Conor I. Anderson - -.trim_HTML <- function(station, localcatalogue, interactive = TRUE) { - - oldwd <- getwd() - - if (missing(localcatalogue)) { - if (file.exists("local_catalogue.rda")) { - load("local_catalogue.rda") - } else { - localcatalogue <- as_tibble(catalogue) - } - } - - cat_index <- which(localcatalogue$StationID == station) - newwd <- file.path(oldwd, localcatalogue$Region[cat_index], "HTML", paste(station, "-", localcatalogue$Station[cat_index])) - - if (!dir.exists(newwd)) { - warning("Directory doesn't exist") - return() - } - - setwd(newwd) - - files <- dir() - - first_index <- min(which(file.size(files) > 3037)) - - if (first_index == Inf) { - print("Uhh ohh, it looks like there is no good data here.") - if (interactive == TRUE) { - go <- readline(prompt = "Should we blow the station away? (y/N)") - if (go == "y" | go == "Y") { - setwd(oldwd) - unlink(newwd, recursive = TRUE) - } - go <- readline(prompt = "Should we update the local catalogue? (y/N)") - if (go == "y" | go == "Y") localcatalogue$`Data Start`[cat_index] <- "NONE"; localcatalogue$`Data End`[cat_index] <- "NONE" - } else { - setwd(oldwd) - unlink(newwd, recursive = TRUE) - localcatalogue$`Data Start`[cat_index] <- "NONE"; localcatalogue$`Data End`[cat_index] <- "NONE" - save(localcatalogue, file = file.path(oldwd, "local_catalogue.rda"), compress = "xz", compression_level = 9) - } - return() - } - - first_year <- substring(files[first_index], 1, 4) - - while (first_index == 1 || sum(file.size(files)[1:12] > 3037) > 6) { - print(paste("Looks like", first_year, "contains data! Let's try for an extra year")) - setwd(oldwd) - senamhiR(1, station, year = (as.numeric(first_year)-1)) - setwd(newwd) - files <- dir() - first_index <- min(which(file.size(files) > 3037)) - first_year <- substring(files[first_index], 1, 4) - } - - last_index <-max(which(file.size(files) > 3037)) - last_year <- substring(files[last_index], 1, 4) - while ((last_index == length(files) || sum(file.size(files)[(length(files)-11):length(files)] > 3037) > 6) && last_year != as.integer(format(Sys.Date(), format = "%Y")) - 1) { - print(paste("Looks like", last_year, "contains data! Let's try for an extra year")) - setwd(oldwd) - senamhiR(1, station, year = (as.numeric(last_year) + 1)) - setwd(newwd) - files <- dir() - last_index <- max(which(file.size(files) > 3037)) - last_year <- substring(files[last_index], 1, 4) - } - print(paste0("We have data from ", first_year, " to ", last_year, ".")) - if (substring(files[1], 1, 4) == first_year && substring(files[length(files)], 1, 4) == last_year) { - print("There are no files to trim!") - if (is.na(localcatalogue$`Data Start`[cat_index]) || localcatalogue$`Data Start`[cat_index] != first_year || is.na(localcatalogue$`Data End`[cat_index]) || localcatalogue$`Data End`[cat_index] != last_year) { - if (interactive == TRUE) { - go <- readline(prompt = "Should we update the local catalogue? (y/N)") - if (go == "y" | go == "Y") { - localcatalogue$`Data Start`[cat_index] <- first_year; localcatalogue$`Data End`[cat_index] <- last_year - save(localcatalogue, file = file.path(oldwd, "local_catalogue.rda"), compress = "xz", compression_level = 9) - } - } else { - localcatalogue$`Data Start`[cat_index] <- first_year; localcatalogue$`Data End`[cat_index] <- last_year - save(localcatalogue, file = file.path(oldwd, "local_catalogue.rda"), compress = "xz", compression_level = 9) - } - } - } else { - files_year <- substring(files, 1, 4) - if (interactive == TRUE) { - print("We are going to blow away the following files.") - print(data.frame(File = files[files_year < first_year | files_year > last_year], Size = file.size(files[files_year < first_year | files_year > last_year]))) - go <- readline(prompt = "Should we go ahead? (y/N)") - if (go == "y" | go == "Y") unlink(files[files_year < first_year | files_year > last_year]) - go <- readline(prompt = "Should we update the local catalogue? (y/N)") - if (go == "y" | go == "Y") { - localcatalogue$`Data Start`[cat_index] <- first_year; localcatalogue$`Data End`[cat_index] <- last_year - save(localcatalogue, file = file.path(oldwd, "local_catalogue.rda"), compress = "xz", compression_level = 9) - } - } else { - unlink(files[files_year < first_year | files_year > last_year]) - localcatalogue$`Data Start`[cat_index] <- first_year; localcatalogue$`Data End`[cat_index] <- last_year - save(localcatalogue, file = file.path(oldwd, "local_catalogue.rda"), compress = "xz", compression_level = 9) - } - } - setwd(oldwd) -} \ No newline at end of file diff --git a/R/trim_data.R b/R/trim_data.R deleted file mode 100644 index ae4f153..0000000 --- a/R/trim_data.R +++ /dev/null @@ -1,48 +0,0 @@ -##' @title [DEPRECATED] Data trimmer -##' -##' @description A helper function to trim CSV files with multiple years of missing data. -##' -##' @param dat an R object of type data.frame passed form the export_data script -##' -##' @return an R object of type data.frame. -##' -##' @keywords internal -##' -##' @author Conor I. Anderson - -.trim_data <- function(dat) { - - tests <- as.data.frame(!is.na(dat[, 2:ncol(dat)])) - - firstYear <- 9999 - lastYear <- 0 - for (i in 1:ncol(tests)) { - tsts <- which(tests[, i] == TRUE) - if (length(tsts) > 0) { - if (tsts[1] < firstYear) - firstYear <- tsts[1] - if (tsts[length(tsts)] > lastYear) - lastYear <- tsts[length(tsts)] - } - } - - if (firstYear == 9999 & lastYear == 0) { - print(paste0("No good data to export.")) - stop("No data to export.") - } - - firstYear <- format(dat$Fecha[firstYear], format = "%Y") - lastYear <- format(dat$Fecha[lastYear], format = "%Y") - print(paste0("Data from ", firstYear, " to ", lastYear, ".")) - - firstYear <- min(grep(firstYear, format(dat$Fecha, format = "%Y"))) - lastYear <- max(grep(lastYear, format(dat$Fecha, format = "%Y"))) - - if (firstYear > 1 | lastYear < nrow(dat)) { - print("Trimming data.") - dat <- subset(dat[firstYear:lastYear, ]) - } else { - print("Nothing to trim!") - } - return(dat) -} diff --git a/R/write_data.R b/R/write_data.R deleted file mode 100644 index 83358aa..0000000 --- a/R/write_data.R +++ /dev/null @@ -1,126 +0,0 @@ -##' @title [DEPRECATED] Compile data from the Peruvian National Hydrological and Meterological Service -##' -##' @description Compile a CSV file of Peruvian historical climate data from the Senamhi web portal. -##' -##' @param station character; the station id number to process. -##' @param year numerical; a vector of years to process. -##' @param write_mode character; if set to 'append', the script will append the data to an exisiting file; if set to 'overwrite', it will overwrite an existing file. If not set, it will not overwrite. -##' @param trim logical; if set to TRUE, the script will trim missing data from the start and end of the data set. Note only completely missing years will be trimmed. -##' @param clean logical; if set to TRUE, the script will delete all of the downloaded HTML files. -##' -##' @return None -##' -##' @author Conor I. Anderson -##' -##' @importFrom XML readHTMLTable -##' @importFrom tibble as_tibble has_name -##' @importFrom utils setTxtProgressBar txtProgressBar -##' @importFrom readr write_csv -##' -##' @export -##' -##' @examples -##' \dontrun{write_data('000401', 2000:2005, trim = TRUE)} - -write_data <- function(station, year, write_mode = "z", trim = TRUE, clean = FALSE) { - - station_data <- catalogue[catalogue$StationID == station, ] - type = station_data$Type - config = station_data$Configuration - - station_name <- station_data$Station - region <- station_data$Region - filename <- paste0(region, "/", as.character(station), " - ", station_name, ".csv") - - if (file.exists(filename) && write_mode != "overwrite" && write_mode != "append") { - return(warning(paste("File", filename, "exists. Not overwriting."), call. = FALSE, - immediate. = TRUE)) - } - - # This snippet of code from Stack Overflow user Grzegorz Szpetkowski at - # http://stackoverflow.com/questions/6243088/find-out-the-number-of-days-of-a-month-in-r - - number_of_days <- function(date) { - m <- format(date, format = "%m") - while (format(date, format = "%m") == m) { - date <- date + 1 - } - return(as.integer(format(date - 1, format = "%d"))) - } - ##-------------------------------------------------------------------------------------- - - # GenFileList - month <- sprintf("%02d", 1:12) - files <- apply(expand.grid(month, year), 1, function(x) paste0(x[2], x[1])) - files <- paste0(region, "/HTML/", as.character(station), " - ", station_name, "/", files, ".html") - - # GenDates - datelist <- apply(expand.grid(month, year), 1, function(x) paste(x[2], x[1], sep = "-")) - datelist <- paste(datelist, "01", sep = "-") - - startDate <- as.Date(paste0(min(year), "-01-01"), format = "%Y-%m-%d") - endDate <- as.Date(paste0(max(year), "-12-31"), format = "%Y-%m-%d") - datecolumn <- seq(as.Date(startDate), by = "day", length.out = (as.numeric(as.Date(endDate) - as.Date(startDate)) + 1)) - - colnames <- .clean_table(config = config, type = type, clean_names = TRUE) - dat <- as_tibble(matrix(nrow = length(datecolumn), ncol = length(colnames))) - names(dat) <- colnames - dat$Fecha <- datecolumn - row <- 1 - - print("Compiling data.") - prog <- txtProgressBar(min = 0, max = length(files), style = 3) - on.exit(close(prog)) - - ## Loop through files and input data to table - for (i in 1:length(files)) { - date <- as.Date(datelist[i], format = "%Y-%m-%d") - table <- try(readHTMLTable(files[i], stringsAsFactors = FALSE)) - if (inherits(table, "try-error")) { - stop("Could not read the requested file. Are you sure you downloaded it?") - } - table <- as_tibble(table[[1]]) - if (nrow(table) > 1) { - ## Sometimes the HTML files only have a few days, instead of the whole month - if (nrow(table) - 1 != number_of_days(date)) { - table <- table[-1, ] - for (j in 1:nrow(table)) { - datadate <- as.character(table[j, 1]) - datadate <- strsplit(datadate, split = "-")[[1]] - datadate <- as.numeric(datadate[1]) - thisrow <- row + datadate - 1 - if (!is.na(thisrow)) dat[thisrow, 2:length(dat)] <- table[j, 2:ncol(table)] - } - } else { - # Sometimes the HTML files only have a subset of the columns - if (ncol(table) != length(colnames)) { - ## Assuming that this only happens with precipitation for now. - table <- table[-1, ] - dat$`Prec07 (mm)`[row:(row + nrow(table) - 1)] <- table[[2]] - dat$`Prec19 (mm)`[row:(row + nrow(table) - 1)] <- table[[3]] - } else { - dat[row:(row + number_of_days(date) - 1), 2:ncol(dat)] <- table[2:nrow(table),2:ncol(table)] - } - } - } - row <- row + number_of_days(date) - setTxtProgressBar(prog, value = i) - } - - # Remove missing value codes and clean up column types - dat <- .clean_table(dat, config, type, remove_missing = TRUE, fix_types = TRUE) - - if (trim) { - dat <- try(.trim_data(dat)) - if (inherits(dat, "try-error")) { - return("There is no good data in this file.") - } - } - if (clean) unlink(files) - - if (write_mode == "append") { - write_csv(dat, filename, append = TRUE) - } else { - write_csv(dat, filename) - } -} diff --git a/man/catalogue.Rd b/man/catalogue.Rd index b143854..e81f7ce 100644 --- a/man/catalogue.Rd +++ b/man/catalogue.Rd @@ -27,7 +27,4 @@ catalogue \description{ A \code{data.frame} containing the identifiers, names, and other characteristics of climate and hydro stations in Peru. } -\examples{ -\dontrun{catalogue} -} \keyword{datasets} diff --git a/man/download_data.Rd b/man/download_data.Rd deleted file mode 100644 index 502749d..0000000 --- a/man/download_data.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download_data.R -\name{download_data} -\alias{download_data} -\title{[DEPRECATED] Download from the Peruvian National Hydrological and Meterological Service} -\usage{ -download_data(station, year, write_mode = "z") -} -\arguments{ -\item{station}{character; the station id number to process.} - -\item{year}{numerical; a vector of years to process.} - -\item{write_mode}{character; if set to 'overwrite', the script will overwrite downloaded files if they exist.} -} -\value{ -None -} -\description{ -Download Peruvian historical climate data from the Senamhi web portal. -} -\examples{ -\dontrun{download_data('000401', 1971:2000)} -} -\author{ -Conor I. Anderson -} diff --git a/man/generate_local_catalogue.Rd b/man/generate_local_catalogue.Rd deleted file mode 100644 index d3ed88e..0000000 --- a/man/generate_local_catalogue.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generate_local_catalogue.R -\name{generate_local_catalogue} -\alias{generate_local_catalogue} -\title{[DEPRECATED] Generate Local Catalogue} -\usage{ -generate_local_catalogue(station, localcatalogue) -} -\arguments{ -\item{station}{character; optional character string to specify one or more stations.} - -\item{localcatalogue}{character; optional character string to specify catalogue object to update.} -} -\value{ -A data frame containing the details of matching stations. -} -\description{ -Generate or update a catalogue of locally downloaded stations. -} -\examples{ -# Update catalogue information for 'Tarapoto'. -\dontrun{generate_local_catalogue("000401")} - -} -\author{ -Conor I. Anderson -} diff --git a/man/read_data.Rd b/man/read_data.Rd deleted file mode 100644 index 46d312a..0000000 --- a/man/read_data.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_data.R -\name{read_data} -\alias{read_data} -\title{[DEPRECATED] Read data from the Peruvian National Hydrological and Meterological Service} -\usage{ -read_data(station, path = "default") -} -\arguments{ -\item{station}{character; the station id number to process.} - -\item{path}{character; the path to the file. By default, the script expects the csv files to be sorted by region.} -} -\value{ -A tibble (tbl_df) of the relative information. -} -\description{ -Read a CSV file of Peruvian historical climate data from the Senamhi web portal. -} -\examples{ -\dontrun{read_data('000401')} -} -\author{ -Conor I. Anderson -} diff --git a/man/senamhiR.Rd b/man/senamhiR.Rd index 252feab..c0a97ea 100644 --- a/man/senamhiR.Rd +++ b/man/senamhiR.Rd @@ -4,18 +4,12 @@ \alias{senamhiR} \title{Download compiled data from the Peruvian National Hydrological and Meterological Service} \usage{ -senamhiR(station, year, tasks, fallback, write_mode = "z") +senamhiR(station, year) } \arguments{ \item{station}{character; the station id number to process. Can also be a vector of station ids, which will be returned as a list.} \item{year}{numerical; a vector of years to process. Defaults to the full known range.} - -\item{tasks}{[DEPRECATED] numerical; define which tasks to perform: 1) Download Data, 2) Compile CSV of Downloaded Data, 3) Both.} - -\item{fallback}{[DEPRECATED] vector; if no year is specified, and the period of available data is unknown, this vector will provide a fallback start and end year to download. If not specified, such stations will be skipped.} - -\item{write_mode}{[DEPRECATED] character; if set to 'append', the script will append the data to an exisiting file; if set to 'overwrite', it will overwrite an existing file. If not set, it will not overwrite.} } \description{ Download compiled Peruvian historical climate data from the Senamhi web portal. diff --git a/man/write_data.Rd b/man/write_data.Rd deleted file mode 100644 index fbd9f84..0000000 --- a/man/write_data.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_data.R -\name{write_data} -\alias{write_data} -\title{[DEPRECATED] Compile data from the Peruvian National Hydrological and Meterological Service} -\usage{ -write_data(station, year, write_mode = "z", trim = TRUE, clean = FALSE) -} -\arguments{ -\item{station}{character; the station id number to process.} - -\item{year}{numerical; a vector of years to process.} - -\item{write_mode}{character; if set to 'append', the script will append the data to an exisiting file; if set to 'overwrite', it will overwrite an existing file. If not set, it will not overwrite.} - -\item{trim}{logical; if set to TRUE, the script will trim missing data from the start and end of the data set. Note only completely missing years will be trimmed.} - -\item{clean}{logical; if set to TRUE, the script will delete all of the downloaded HTML files.} -} -\value{ -None -} -\description{ -Compile a CSV file of Peruvian historical climate data from the Senamhi web portal. -} -\examples{ -\dontrun{write_data('000401', 2000:2005, trim = TRUE)} -} -\author{ -Conor I. Anderson -} diff --git a/tests/testthat/test-senamhiR.R b/tests/testthat/test-senamhiR.R index ae990fa..86bd350 100644 --- a/tests/testthat/test-senamhiR.R +++ b/tests/testthat/test-senamhiR.R @@ -23,5 +23,5 @@ test_that("senamhiR can filter by year", { ## should fail when no correct station is given test_that("senamhiR() fails when an incorrect station is requested", { - expect_error(senamhiR("foo"), "The station requested is not a valid station.", fixed=TRUE) + expect_error(senamhiR("foo"), "One or more requested stations invalid.", fixed=TRUE) })