diff --git a/NAMESPACE b/NAMESPACE index d21d6bd..fb17b02 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ import(stars) import(twdtw) importFrom(mgcv,gam) importFrom(mgcv,predict.gam) +importFrom(mgcv,s) importFrom(reshape2,melt) importFrom(rlang,.data) importFrom(scales,date_format) @@ -16,3 +17,4 @@ importFrom(scales,percent) importFrom(scales,pretty_breaks) importFrom(stats,as.formula) importFrom(stats,predict) +importFrom(stats,setNames) diff --git a/R/create_patterns.R b/R/create_patterns.R index 12d7198..5ac7552 100644 --- a/R/create_patterns.R +++ b/R/create_patterns.R @@ -2,15 +2,14 @@ #' #' This function creates a pattern based on Generalized Additive Models (GAM). #' It uses the specified formula to fit the model and predict values. -#' It operates on stars objects (for spatial-temporal data) and sf objects (for spatial data). #' -#' @param x A stars object representing spatial-temporal data. -#' @param y An sf object representing spatial data with associated attributes. -#' @param formula A formula for the GAM. +#' @param x A three dimensions stars object (x, y, time) with the satellite image time series. +#' @param y An sf object with the coordinates of the training points. +#' @param formula A formula for the GAM. Default is \code{band ~ \link[mgcv]{s}(time)}. #' @param start_column Name of the column in y that indicates the start date. Default is 'start_date'. #' @param end_column Name of the column in y that indicates the end date. Default is 'end_date'. -#' @param label_colum Name of the column in y that contains labels. Default is 'label'. -#' @param sampling_freq The time frequency for sampling. If NULL, the function will infer it. +#' @param label_colum Name of the column in y that contains land use labels. Default is 'label'. +#' @param sampling_freq The time frequency for sampling including unit, e.g '16 day'. If NULL, the function will infer it. #' @param ... Additional arguments passed to the GAM function. #' #' @return A list containing the predicted values for each label. @@ -18,7 +17,7 @@ #' #' #' @export -create_patterns = function(x, y, formula, start_column = 'start_date', +create_patterns = function(x, y, formula = band ~ s(time), start_column = 'start_date', end_column = 'end_date', label_colum = 'label', sampling_freq = NULL, ...){ @@ -39,6 +38,11 @@ create_patterns = function(x, y, formula, start_column = 'start_date', stop(paste("Missing required columns in y:", paste(missing_columns, collapse = ", "))) } + # Check if formula has two + if(length(all.vars(formula)) != 2) { + stop("The formula should have only one predictor!") + } + # Convert columns to date-time y[ , start_column] <- to_date_time(y[[start_column]]) y[ , end_column] <- to_date_time(y[[end_column]]) @@ -66,29 +70,27 @@ create_patterns = function(x, y, formula, start_column = 'start_date', sampling_freq <- get_stars_time_freq(x) } - # Extract variables from formula - vars <- all.vars(formula) - # Define GAM function - gam_fun = function(y, t, formula_vars, ...){ - df = data.frame(y, t = as.numeric(t)) - names(df) <- formula_vars - fit = mgcv::gam(data = df, formula = as.formula(paste(formula_vars[1], "~", formula_vars[2])), ...) - pred_t = data.frame(t = as.numeric(seq(min(t), max(t), by = sampling_freq))) + gam_fun <- function(y, t, formula, ...){ + df <- data.frame(y, t = as.numeric(t)) + df <- setNames(list(y, as.numeric(t)), all.vars(formula)) + fit <- mgcv::gam(data = df, formula = formula, ...) + pred_t <- setNames(list(as.numeric(seq(min(t), max(t), by = sampling_freq))), all.vars(formula)[2]) predict(fit, newdata = pred_t) } # Apply GAM function - res <- lapply(y_ts, function(ts){ + patterns <- lapply(y_ts, function(ts){ y_time <- ts$time ts$time <- NULL ts$id <- NULL sapply(as.list(ts), function(y) { - gam_fun(y, y_time, vars) + gam_fun(y, y_time, formula, ...) }) }) - return(res) + return(patterns) + } @@ -119,24 +121,26 @@ extract_time_series <- function(x, y) { #' This function calculates the most common difference between consecutive time points in a stars object. #' This can be useful for determining the sampling frequency of the time series data. #' -#' @param stars_object A stars object containing time series data. +#' @param x A stars object containing time series data. #' #' @return A difftime object representing the most common time difference between consecutive samples. #' #' -get_stars_time_freq <- function(stars_object) { +get_stars_time_freq <- function(x) { # Extract the time dimension - time_values <- st_get_dimension_values(stars_object, "time") + time_values <- st_get_dimension_values(x, "time") # Compute the differences between consecutive time points time_diffs <- diff(time_values) - # Convert differences to days + # Convert differences to days (while retaining the difftime class) time_diffs <- as.difftime(time_diffs, units = "days") - # Compute the most common difference (mode) - freq <- unique(time_diffs)[which.max(tabulate(match(time_diffs, unique(time_diffs))))] + # Identify the mode + mode_val_index <- which.max(tabulate(match(time_diffs, unique(time_diffs)))) + freq <- diff(time_values[mode_val_index:(mode_val_index+1)]) return(freq) } + diff --git a/R/zzz.R b/R/zzz.R index 06468d0..499fd6c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -2,8 +2,8 @@ #' @import sf #' @import stars #' @import ggplot2 -#' @importFrom stats as.formula predict -#' @importFrom mgcv gam predict.gam +#' @importFrom stats as.formula predict setNames +#' @importFrom mgcv gam s predict.gam #' @importFrom scales pretty_breaks date_format percent #' @importFrom reshape2 melt #' @importFrom rlang .data diff --git a/man/create_patterns.Rd b/man/create_patterns.Rd index 389c860..0ff9bd5 100644 --- a/man/create_patterns.Rd +++ b/man/create_patterns.Rd @@ -7,7 +7,7 @@ create_patterns( x, y, - formula, + formula = band ~ s(time), start_column = "start_date", end_column = "end_date", label_colum = "label", @@ -16,19 +16,19 @@ create_patterns( ) } \arguments{ -\item{x}{A stars object representing spatial-temporal data.} +\item{x}{A three dimensions stars object (x, y, time) with the satellite image time series.} -\item{y}{An sf object representing spatial data with associated attributes.} +\item{y}{An sf object with the coordinates of the training points.} -\item{formula}{A formula for the GAM.} +\item{formula}{A formula for the GAM. Default is \code{band ~ \link[mgcv]{s}(time)}.} \item{start_column}{Name of the column in y that indicates the start date. Default is 'start_date'.} \item{end_column}{Name of the column in y that indicates the end date. Default is 'end_date'.} -\item{label_colum}{Name of the column in y that contains labels. Default is 'label'.} +\item{label_colum}{Name of the column in y that contains land use labels. Default is 'label'.} -\item{sampling_freq}{The time frequency for sampling. If NULL, the function will infer it.} +\item{sampling_freq}{The time frequency for sampling including unit, e.g '16 day'. If NULL, the function will infer it.} \item{...}{Additional arguments passed to the GAM function.} } @@ -38,5 +38,4 @@ A list containing the predicted values for each label. \description{ This function creates a pattern based on Generalized Additive Models (GAM). It uses the specified formula to fit the model and predict values. -It operates on stars objects (for spatial-temporal data) and sf objects (for spatial data). } diff --git a/man/get_stars_time_freq.Rd b/man/get_stars_time_freq.Rd index 97a3ad4..47d90c4 100644 --- a/man/get_stars_time_freq.Rd +++ b/man/get_stars_time_freq.Rd @@ -4,10 +4,10 @@ \alias{get_stars_time_freq} \title{Compute the Most Common Sampling Frequency in a Stars Object} \usage{ -get_stars_time_freq(stars_object) +get_stars_time_freq(x) } \arguments{ -\item{stars_object}{A stars object containing time series data.} +\item{x}{A stars object containing time series data.} } \value{ A difftime object representing the most common time difference between consecutive samples. diff --git a/tests/testthat/test-twdtw_classify.R b/tests/testthat/test-twdtw_classify.R index 024121c..f5c0c04 100644 --- a/tests/testthat/test-twdtw_classify.R +++ b/tests/testthat/test-twdtw_classify.R @@ -21,7 +21,7 @@ dc <- read_stars(tif_files, proxy = FALSE, along = list(time = acquisition_date) dc <- dc[c("EVI", "NDVI", "RED", "BLUE", "NIR", "MIR")] # Get temporal patters -ts_patterns <- create_patterns(x = dc, y = samples, formula = band ~ s(t), sampling_freq = 23) +ts_patterns <- create_patterns(x = dc, y = samples) # Visualize patterns plot_patterns(ts_patterns)