From 36cde569eb44f28f5f6b894e74efd56ee2efc1f6 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 23 Sep 2023 16:54:17 +0200 Subject: [PATCH 1/6] Add arg "check." to get_environment_variables() and add more small functions to utils.R --- R/measurement-chains.R | 18 ++++----------- R/utils.R | 38 ++++++++++++++++++++++++++++++-- vignettes/measurement-chains.Rmd | 9 ++------ 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/R/measurement-chains.R b/R/measurement-chains.R index 5e239f1b..87bef3a4 100644 --- a/R/measurement-chains.R +++ b/R/measurement-chains.R @@ -42,23 +42,13 @@ get_measurementchains_metadata <- function( #' @importFrom stringr str_length create_sftp_connection <- function() { - con_vars <- c( + con <- get_environment_variables( server = "MESSKETTEN_SERVER", username = "MESSKETTEN_USER", - password = "MESSKETTEN_PASSWORD" + password = "MESSKETTEN_PASSWORD", + check. = TRUE ) - - con <- do.call(get_environment_variables, as.list(con_vars)) - - not_defined <- sapply(con, stringr::str_length) == 0L - - if (any(not_defined)) { - kwb.utils::stopFormatted( - "The following required environment variables are undefined/empty:\n%s", - paste0(con_vars[not_defined], collapse = ", ") - ) - } - + do.call(sftp::sftp_connect, con) } diff --git a/R/utils.R b/R/utils.R index 3334ecfd..e1f26e5f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,14 @@ +# all_defined ------------------------------------------------------------------ + +#' Check if all strings are not empty +#' +#' @param x vector of character +#' @return \code{TRUE} or \code{FALSE} +all_defined <- function(x) +{ + !any(is_empty_string(x)) +} + # exclude_missing_files -------------------------------------------------------- exclude_missing_files <- function(files) { @@ -21,10 +32,33 @@ exclude_missing_files <- function(files) files } +# extdata_file ----------------------------------------------------------------- +extdata_file <- function(...) +{ + system.file("extdata", ..., package = "kwb.geosalz") +} + # get_environment_variables ---------------------------------------------------- -get_environment_variables <- function(...) +get_environment_variables <- function(..., check. = FALSE) +{ + variables <- list(...) + + values <- lapply(variables, Sys.getenv) + + if (check. && any(is_empty <- is_empty_string(values))) { + kwb.utils::stopFormatted( + "The following required environment variables are undefined/empty:\n%s", + paste0(unlist(variables[is_empty]), collapse = ", ") + ) + } + + values +} + +# is_empty_string -------------------------------------------------------------- +is_empty_string <- function(x) { - lapply(list(...), Sys.getenv) + stringr::str_length(unlist(x)) == 0L } # or_pattern ------------------------------------------------------------------- diff --git a/vignettes/measurement-chains.Rmd b/vignettes/measurement-chains.Rmd index be978840..887606de 100644 --- a/vignettes/measurement-chains.Rmd +++ b/vignettes/measurement-chains.Rmd @@ -16,11 +16,6 @@ knitr::opts_chunk$set( # Load pipe operator `%>%` <- magrittr::`%>%` -# Helper function to check if all strings are not empty -all_defined <- function(x) { - all(sapply(x, stringr::str_length) > 0L) -} - # Get environment variables for access to SFTP server with input data con <- kwb.geosalz:::get_environment_variables( server = "MESSKETTEN_SERVER", @@ -36,8 +31,8 @@ nc <- kwb.geosalz:::get_environment_variables( ) # Are all environment variables defined? -con_defined <- all_defined(con) -nc_defined <- all_defined(nc) +con_defined <- kwb.geosalz:::all_defined(con) +nc_defined <- kwb.geosalz:::all_defined(nc) # Is this script running on a GitHub server? is_ghactions <- identical(Sys.getenv("CI"), "true") From 5952babdf09afa6ee61256c83a6545e535e654ca Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 23 Sep 2023 16:55:41 +0200 Subject: [PATCH 2/6] Improve readability using helper variables --- R/measurement-chains.R | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/R/measurement-chains.R b/R/measurement-chains.R index 87bef3a4..3d579804 100644 --- a/R/measurement-chains.R +++ b/R/measurement-chains.R @@ -1,7 +1,7 @@ #' Measurement Chains: Get Metadata #' -#' @param file path to measurement chains metadata file (default: -#' system.file("extdata/metadata_messketten.csv", package = "kwb.geosalz")) +#' @param file path to measurement chains metadata file. Default: +#' kwb.geosalz:::extdata_file("metadata_messketten.csv") #' @return tibble with measurement chains metadata #' @export #' @importFrom readr cols col_character col_integer col_double read_csv @@ -10,27 +10,27 @@ #' str(mc_metadata) #' mc_metadata get_measurementchains_metadata <- function( - file = system.file( - "extdata/metadata_messketten.csv", - package = "kwb.geosalz" - ) + file = extdata_file("metadata_messketten.csv") ) { - readr::read_csv( - file = file, - col_types = readr::cols( - "galerie" = readr::col_character(), - "brunnen_nummer" = readr::col_integer(), - "dn" = readr::col_integer(), - "einbau_pumpe" = readr::col_character(), - "einbau_messkette" = readr::col_character(), - "filteroberkante_muGOK" = readr::col_double(), - "filterunterkante_muGOK" = readr::col_double(), - "sensor_id" = readr::col_integer(), - "sensor_endnummer" = readr::col_integer(), - "einbau_sensor_muGOK" = readr::col_double() - ) + chr <- readr::col_character() + int <- readr::col_integer() + dbl <- readr::col_double() + + col_types <- readr::cols( + galerie = chr, + brunnen_nummer = int, + dn = int, + einbau_pumpe = chr, + einbau_messkette = chr, + filteroberkante_muGOK = dbl, + filterunterkante_muGOK = dbl, + sensor_id = int, + sensor_endnummer = int, + einbau_sensor_muGOK = dbl ) + + readr::read_csv(file, col_types = col_types) } #' Measurement Chains: Create an SFTP Connection From f820508153e259796a12145bcc8feb5d27d01ec0 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 23 Sep 2023 17:46:44 +0200 Subject: [PATCH 3/6] Use more piping, do not use double quotes and use dplyr::rename() correctly --- R/measurement-chains.R | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/R/measurement-chains.R b/R/measurement-chains.R index 3d579804..c56d0f88 100644 --- a/R/measurement-chains.R +++ b/R/measurement-chains.R @@ -77,7 +77,8 @@ get_measurementchains_files <- function( debug = FALSE ) { - file_info <- list_sftp_files(sftp_connection) %>% + file_info <- sftp_connection %>% + list_sftp_files() %>% kwb.utils::renameColumns(list(name = "sftp_path")) folder_file <- file_info %>% @@ -112,7 +113,10 @@ list_sftp_files <- function( # split_into_folder_and_file --------------------------------------------------- split_into_folder_and_file <- function(x) { - data.frame(folder = dirname(x), file = basename(x)) + data.frame( + folder = dirname(x), + file = basename(x) + ) } # split_into_galery_and_well --------------------------------------------------- @@ -143,8 +147,8 @@ split_into_sensor_and_datetime <- function(x) datum_uhrzeit = as.POSIXct( .data[["datum_uhrzeit"]], format = "%Y-%m-%d-%H%M", - #data is always CET without switching - #https://stackoverflow.com/a/38333522 + # data is always CET without switching + # https://stackoverflow.com/a/38333522 tz = "Etc/GMT-1" ) ) @@ -311,23 +315,21 @@ exclude_existing_paths <- function(paths, target) #' } read_measurementchain_data <- function(path) { - readr::read_csv( - file = path, - locale = readr::locale( - #data is always CET without switching - #https://stackoverflow.com/a/38333522 - tz = "Etc/GMT-1" - ), - col_types = readr::cols( - "Geraet" = readr::col_integer(), - "DatumUhrzeit" = readr::col_datetime(), - "Leitfaehigkeit" = readr::col_double(), - "Temperatur" = readr::col_double() - ) - ) %>% + path %>% + readr::read_csv( + # data is always CET without switching + # https://stackoverflow.com/a/38333522 + locale = readr::locale(tz = "Etc/GMT-1"), + col_types = readr::cols( + Geraet = readr::col_integer(), + DatumUhrzeit = readr::col_datetime(), + Leitfaehigkeit = readr::col_double(), + Temperatur = readr::col_double() + ) + ) %>% dplyr::rename( - sensor_id = .data$Geraet, - datum_uhrzeit = .data$DatumUhrzeit + sensor_id = "Geraet", + datum_uhrzeit = "DatumUhrzeit" ) %>% tidyr::pivot_longer( names_to = "parameter", From 1ade1f0bb6da7a6befc7ec0e77e46526cd22b654 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 23 Sep 2023 18:58:00 +0200 Subject: [PATCH 4/6] Use new utils functions as_(gmt_plus_one|utc)() --- R/get_measurement_chain_data_on_cloud.R | 19 ++----------- R/measurement-chains.R | 11 +++----- R/plot_measurementchains.R | 5 +--- R/utils.R | 37 +++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/R/get_measurement_chain_data_on_cloud.R b/R/get_measurement_chain_data_on_cloud.R index 0f1a13b3..bf038766 100644 --- a/R/get_measurement_chain_data_on_cloud.R +++ b/R/get_measurement_chain_data_on_cloud.R @@ -28,8 +28,8 @@ get_measurement_chain_data_on_cloud <- function(dbg = TRUE) unzip_first_file() %>% read.csv() %>% dplyr::mutate( - # Convert the date time character to POSIXct - datum_uhrzeit = utc_text_to_posix_gmt_plus_1(.data[["datum_uhrzeit"]]) + # Convert the date time character to POSIXct in GMT+1 + datum_uhrzeit = as_gmt_plus_one(as_utc(.data[["datum_uhrzeit"]])) ) } @@ -50,18 +50,3 @@ unzip_first_file <- function(file) # Return the full path to the unzipped file kwb.utils::safePath(exdir, filename) } - -# utc_text_to_posix_gmt_plus_1 ------------------------------------------------- -utc_text_to_posix_gmt_plus_1 <- function(x) -{ - # The given vector must be of type character - stopifnot(is.character(x)) - - # All elements in x must look like this: - # --TZ - stopifnot(all(grepl("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z$", x))) - - times <- as.POSIXct(x, format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC") - - structure(times, tzone = "Etc/GMT-1") -} diff --git a/R/measurement-chains.R b/R/measurement-chains.R index c56d0f88..d26669e2 100644 --- a/R/measurement-chains.R +++ b/R/measurement-chains.R @@ -144,12 +144,9 @@ split_into_sensor_and_datetime <- function(x) dplyr::mutate( sensor_id = as.integer(.data[["sensor_id"]]), sensor_endnummer = as.integer(.data[["sensor_endnummer"]]), - datum_uhrzeit = as.POSIXct( + datum_uhrzeit = as_gmt_plus_one( .data[["datum_uhrzeit"]], format = "%Y-%m-%d-%H%M", - # data is always CET without switching - # https://stackoverflow.com/a/38333522 - tz = "Etc/GMT-1" ) ) } @@ -343,8 +340,8 @@ read_measurementchain_data <- function(path) #' @param csv_files vector of paths as retrieved by #' \code{\link{download_measurementchains_data}} #' @param datetime_installation datetime of first logger installation in well K10. -#' Used to filter out older measurement data! (default: as.POSIXct("2022-09-27 11:00:00", -#' tz = "Etc/GMT-1") +#' Used to filter out older measurement data! Default: +#' kwb.geosalz:::as_gmt_plus_one("2022-09-27 11:00:00") #' @param run_parallel default: TRUE #' @param debug show debug messages (default: FALSE) #' @return data frame with imported data from csv files @@ -365,7 +362,7 @@ read_measurementchain_data <- function(path) #' } read_measurementchains_data <- function( csv_files, - datetime_installation = as.POSIXct("2022-09-27 11:00:00", tz = "Etc/GMT-1"), + datetime_installation = as_gmt_plus_one("2022-09-27 11:00:00"), run_parallel = TRUE, debug = FALSE ) diff --git a/R/plot_measurementchains.R b/R/plot_measurementchains.R index 64906bb5..fe0472aa 100644 --- a/R/plot_measurementchains.R +++ b/R/plot_measurementchains.R @@ -29,10 +29,7 @@ plot_measurementchains <- function(mc_data, para = "Leitfaehigkeit") dat %>% dplyr::filter(.data$brunnen_nummer == well_ids[i]) %>% dplyr::filter( - .data$datum_uhrzeit >= as.POSIXct( - "2022-09-27 11:00:00", - tz = "Etc/GMT-1" - ) + .data$datum_uhrzeit >= as_gmt_plus_one("2022-09-27 11:00:00") ) %>% dplyr::mutate( label = as.factor(sprintf( diff --git a/R/utils.R b/R/utils.R index e1f26e5f..82515453 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,6 +9,43 @@ all_defined <- function(x) !any(is_empty_string(x)) } +# as_gmt_plus_one -------------------------------------------------------------- +as_gmt_plus_one <- function(x, format = "%Y-%m-%d %H:%M:%S") +{ + # data is always CET without switching + # https://stackoverflow.com/a/38333522 + + # Timezone string. GMT-1 is correct! the result will be GMT+1, e.g. + # as_gmt_plus_one("2023-09-23 11:00:00") # "2023-09-23 11:00:00 +01" + + tzone <- "Etc/GMT-1" + + # If x is already a POSIXct object, change the tzone attribute + if (inherits(x, "POSIXct")) { + return(structure(x, tzone = tzone)) + } + + # Otherwise we expect x to be of type character + stopifnot(is.character(x)) + + # Convert character to POSIXct + as.POSIXct(x, format = format, tz = tzone) +} + +# as_utc ----------------------------------------------------------------------- +as_utc <- function(x) +{ + # The given vector must be of type character + stopifnot(is.character(x)) + + # All elements in x must look like this: + # --TZ + stopifnot(all(grepl("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z$", x))) + + # Convert character to POSIXct + as.POSIXct(x, format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC") +} + # exclude_missing_files -------------------------------------------------------- exclude_missing_files <- function(files) { From c9276d665beda7e000c791cddca3e26ca8e7090c Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 23 Sep 2023 18:58:14 +0200 Subject: [PATCH 5/6] Update Rd files --- man/all_defined.Rd | 17 +++++++++++++++++ man/get_measurementchains_metadata.Rd | 8 +++----- man/read_measurementchains_data.Rd | 6 +++--- 3 files changed, 23 insertions(+), 8 deletions(-) create mode 100644 man/all_defined.Rd diff --git a/man/all_defined.Rd b/man/all_defined.Rd new file mode 100644 index 00000000..098675f2 --- /dev/null +++ b/man/all_defined.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{all_defined} +\alias{all_defined} +\title{Check if all strings are not empty} +\usage{ +all_defined(x) +} +\arguments{ +\item{x}{vector of character} +} +\value{ +\code{TRUE} or \code{FALSE} +} +\description{ +Check if all strings are not empty +} diff --git a/man/get_measurementchains_metadata.Rd b/man/get_measurementchains_metadata.Rd index 718f4eda..65c4c43c 100644 --- a/man/get_measurementchains_metadata.Rd +++ b/man/get_measurementchains_metadata.Rd @@ -4,13 +4,11 @@ \alias{get_measurementchains_metadata} \title{Measurement Chains: Get Metadata} \usage{ -get_measurementchains_metadata( - file = system.file("extdata/metadata_messketten.csv", package = "kwb.geosalz") -) +get_measurementchains_metadata(file = extdata_file("metadata_messketten.csv")) } \arguments{ -\item{file}{path to measurement chains metadata file (default: -system.file("extdata/metadata_messketten.csv", package = "kwb.geosalz"))} +\item{file}{path to measurement chains metadata file. Default: +kwb.geosalz:::extdata_file("metadata_messketten.csv")} } \value{ tibble with measurement chains metadata diff --git a/man/read_measurementchains_data.Rd b/man/read_measurementchains_data.Rd index fb63f7fb..ed53ebac 100644 --- a/man/read_measurementchains_data.Rd +++ b/man/read_measurementchains_data.Rd @@ -6,7 +6,7 @@ \usage{ read_measurementchains_data( csv_files, - datetime_installation = as.POSIXct("2022-09-27 11:00:00", tz = "Etc/GMT-1"), + datetime_installation = as_gmt_plus_one("2022-09-27 11:00:00"), run_parallel = TRUE, debug = FALSE ) @@ -16,8 +16,8 @@ read_measurementchains_data( \code{\link{download_measurementchains_data}}} \item{datetime_installation}{datetime of first logger installation in well K10. -Used to filter out older measurement data! (default: as.POSIXct("2022-09-27 11:00:00", -tz = "Etc/GMT-1")} +Used to filter out older measurement data! Default: +kwb.geosalz:::as_gmt_plus_one("2022-09-27 11:00:00")} \item{run_parallel}{default: TRUE} From d05df798c2f19eaaa7fedaf17901417aa8eeb733 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 23 Sep 2023 19:20:52 +0200 Subject: [PATCH 6/6] Fix bug: remove extra comma --- R/measurement-chains.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measurement-chains.R b/R/measurement-chains.R index d26669e2..c4e6fe99 100644 --- a/R/measurement-chains.R +++ b/R/measurement-chains.R @@ -146,7 +146,7 @@ split_into_sensor_and_datetime <- function(x) sensor_endnummer = as.integer(.data[["sensor_endnummer"]]), datum_uhrzeit = as_gmt_plus_one( .data[["datum_uhrzeit"]], - format = "%Y-%m-%d-%H%M", + format = "%Y-%m-%d-%H%M" ) ) }