From 63291504b656e1a02effa33af5d84072ba3328aa Mon Sep 17 00:00:00 2001 From: vwmaus Date: Fri, 8 Sep 2023 00:21:47 +0200 Subject: [PATCH] Adds knn-1-twdtw train and predict methods for stars objects --- DESCRIPTION | 7 +- NAMESPACE | 14 +-- R/create_patterns.R | 146 ------------------------- R/plot_patterns.R | 62 +++++------ R/predict.R | 34 ++++++ R/prepare_time_series.R | 81 ++++++++++++++ R/{utils.R => shift_dates.R} | 20 +--- R/train.R | 154 +++++++++++++++++++++++++++ R/zzz.R | 6 +- inst/mato_grosso_brazil/samples.gpkg | Bin 135168 -> 135168 bytes man/create_patterns.Rd | 41 ------- man/extract_time_series.Rd | 21 ---- man/get_stars_time_freq.Rd | 2 +- man/knn1_twdtw.Rd | 53 +++++++++ man/plot.knn1_twdtw.Rd | 31 ++++++ man/plot_patterns.Rd | 26 ----- man/predict.knn1_twdtw.Rd | 24 +++++ man/prepare_time_series.Rd | 20 ++++ man/shift_dates.Rd | 2 +- tests/testthat/test-twdtw_classify.R | 30 +++--- 20 files changed, 464 insertions(+), 310 deletions(-) delete mode 100644 R/create_patterns.R create mode 100644 R/predict.R create mode 100644 R/prepare_time_series.R rename R/{utils.R => shift_dates.R} (76%) create mode 100644 R/train.R delete mode 100644 man/create_patterns.Rd delete mode 100644 man/extract_time_series.Rd create mode 100644 man/knn1_twdtw.Rd create mode 100644 man/plot.knn1_twdtw.Rd delete mode 100644 man/plot_patterns.Rd create mode 100644 man/predict.knn1_twdtw.Rd create mode 100644 man/prepare_time_series.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 4e08634..51051a4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,11 +47,10 @@ Depends: Imports: mgcv, stats, - scales, - reshape2, - rlang + tidyr, + rlang, + proxy Suggests: rbenchmark, - stringr, testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index fb17b02..d91ee48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,8 @@ # Generated by roxygen2: do not edit by hand -export(create_patterns) -export(plot_patterns) +S3method(plot,knn1_twdtw) +S3method(predict,knn1_twdtw) +export(knn1_twdtw) export(shift_dates) import(ggplot2) import(sf) @@ -10,11 +11,12 @@ import(twdtw) importFrom(mgcv,gam) importFrom(mgcv,predict.gam) importFrom(mgcv,s) -importFrom(reshape2,melt) +importFrom(proxy,dist) importFrom(rlang,.data) -importFrom(scales,date_format) -importFrom(scales,percent) -importFrom(scales,pretty_breaks) importFrom(stats,as.formula) importFrom(stats,predict) importFrom(stats,setNames) +importFrom(tidyr,nest) +importFrom(tidyr,pivot_longer) +importFrom(tidyr,pivot_wider) +importFrom(tidyr,unnest) diff --git a/R/create_patterns.R b/R/create_patterns.R deleted file mode 100644 index 856d1dd..0000000 --- a/R/create_patterns.R +++ /dev/null @@ -1,146 +0,0 @@ -#' Create a Pattern Using GAM -#' -#' This function creates a pattern based on Generalized Additive Models (GAM). -#' It uses the specified formula to fit the model and predict values. -#' -#' @param x A three dimensions stars object (x, y, time) with the satellite image time series. -#' @param y An sf object with the coordinates of the training points. -#' @param formula A formula for the GAM. Default is \code{band ~ \link[mgcv]{s}(time)}. -#' @param start_column Name of the column in y that indicates the start date. Default is 'start_date'. -#' @param end_column Name of the column in y that indicates the end date. Default is 'end_date'. -#' @param label_colum Name of the column in y that contains land use labels. Default is 'label'. -#' @param sampling_freq The time frequency for sampling including unit, e.g '16 day'. If NULL, the function will infer it. -#' @param ... Additional arguments passed to the GAM function. -#' -#' @return A list containing the predicted values for each label. -#' -#' -#' -#' @export -create_patterns = function(x, y, formula = band ~ s(time), start_column = 'start_date', - end_column = 'end_date', label_colum = 'label', - sampling_freq = NULL, ...){ - - # Check if x is a stars object with a time dimension - if (!inherits(x, "stars") || dim(x)['time'] < 1) { - stop("x must be a stars object with a 'time' dimension") - } - - # Check if y is an sf object with point geometry - if (!inherits(y, "sf") || !all(st_is(y, "POINT"))) { - stop("y must be an sf object with point geometry") - } - - # Check for required columns in y - required_columns <- c(start_column, end_column, label_colum) - missing_columns <- setdiff(required_columns, names(y)) - if (length(missing_columns) > 0) { - stop(paste("Missing required columns in y:", paste(missing_columns, collapse = ", "))) - } - - # Check if formula has two - if(length(all.vars(formula)) != 2) { - stop("The formula should have only one predictor!") - } - - # Convert columns to date-time - y[ , start_column] <- to_date_time(y[[start_column]]) - y[ , end_column] <- to_date_time(y[[end_column]]) - - # Extract time series from stars - y_ts <- extract_time_series(x, y) - y_ts$geom <- NULL - - # Shift dates - unique_ids <- unique(y_ts$id) - for (id in seq_along(unique_ids)) { - idx <- y_ts$id == unique_ids[id] - y_ts[idx, 'time'] <- shift_dates(y_ts[idx, 'time']) - } - - # Split data frame by label and remove label column - y_ts <- lapply(split(y_ts, y_ts$label), function(df) { - df$label <- NULL - return(df) - }) - - # Determine sampling frequency - if (is.null(sampling_freq)) { - sampling_freq <- get_stars_time_freq(x) - cat("Sampling frequency inferred from the stars object:", as.numeric(sampling_freq), attr(sampling_freq, "units"), "\n") - } - - # Define GAM function - gam_fun <- function(y, t, formula, ...){ - df <- data.frame(y, t = as.numeric(t)) - df <- setNames(list(y, as.numeric(t)), all.vars(formula)) - fit <- mgcv::gam(data = df, formula = formula, ...) - pred_t <- setNames(list(as.numeric(seq(min(t), max(t), by = sampling_freq))), all.vars(formula)[2]) - predict(fit, newdata = pred_t) - } - - # Apply GAM function - patterns <- lapply(y_ts, function(ts){ - y_time <- ts$time - ts$time <- NULL - ts$id <- NULL - sapply(as.list(ts), function(y) { - gam_fun(y, y_time, formula, ...) - }) - }) - - return(patterns) - -} - - - -#' Extract Time Series from a Stars Object for Specified Points -#' -#' This function extracts a time series from a stars object for each specified point in the sf object. -#' Each extracted sample is then labeled with an ID and the label from the sf object. -#' -#' @param x A stars object containing the raster time series data. -#' @param y An sf object containing the point geometries and their associated labels. -#' -#' @return A data.frame with the extracted time series for each point in the sf object, -#' with additional columns for the ID and label of each sample. -#' -#' -extract_time_series <- function(x, y) { - ts_samples <- st_extract(x, y) - ts_samples$id <- 1:dim(ts_samples)["geom"] - ts_samples$label <- y$label - as.data.frame(ts_samples) -} - - - -#' Compute the Most Common Sampling Frequency in a Stars Object -#' -#' This function calculates the most common difference between consecutive time points in a stars object. -#' This can be useful for determining the sampling frequency of the time series data. -#' -#' @param x A stars object containing time series data. -#' -#' @return A difftime object representing the most common time difference between consecutive samples. -#' -#' -get_stars_time_freq <- function(x) { - - # Extract the time dimension - time_values <- st_get_dimension_values(x, "time") - - # Compute the differences between consecutive time points - time_diffs <- diff(time_values) - - # Convert differences to days (while retaining the difftime class) - time_diffs <- as.difftime(time_diffs, units = "days") - - # Identify the mode - mode_val_index <- which.max(tabulate(match(time_diffs, unique(time_diffs)))) - freq <- diff(time_values[mode_val_index:(mode_val_index+1)]) - - return(freq) -} - diff --git a/R/plot_patterns.R b/R/plot_patterns.R index dd983b8..bed19f3 100644 --- a/R/plot_patterns.R +++ b/R/plot_patterns.R @@ -1,41 +1,43 @@ #' Plot Patterns from Time Series Data #' -#' This function takes a list of time series data and creates a multi-faceted plot -#' where each facet corresponds to a different time series from the list. -#' Within each facet, different attributes (columns of the time series) are -#' plotted as lines with different colors. +#' This function visualizes time series data patterns from the \code{"knn1_twdtw"} model. +#' It produces a multi-faceted plot, +#' where each facet represents a different time series pattern from the model's data. +#' Within each facet, different attributes (e.g., bands or indices) are plotted as +#' distinct lines, differentiated by color. #' -#' @param x A list where each element is a data.frame representing a time series. -#' Each data.frame should have the same number of rows and columns, -#' with columns representing different attributes (e.g., bands or indices) -#' and rows representing time points. -#' The name of each element in the list will be used as the facet title. +#' @param x A model of class \code{"knn1_twdtw"}. #' -#' @param ... Not used. +#' @param n An integer specifying the number of patterns to plot. Default is 12. #' -#' @return A ggplot object displaying the time series patterns. +#' @param ... Additional arguments. Currently not used. +#' +#' @details +#' The function is designed for visual inspection of time series patterns from the model. +#' Each data frame in the model's data represents a time series. Columns within the data frame +#' correspond to different attributes (e.g., bands or indices), while rows represent time points. +#' The facet title is derived from the name of each time series in the model's data. +#' +#' @return A \code{\link[ggplot2]{ggplot}} object displaying the time series patterns. #' #' @export -plot_patterns = function(x, ...) { - +plot.knn1_twdtw <- function(x, n = 12, ...) { + # Convert the list of time series data into a long-format data.frame - df.p = do.call("rbind", lapply(names(x), function(p) { - ts = x[[p]] - # Create a new data.frame with a 'Time' column and a 'Pattern' column - # representing the name of the current time series (facet name). - data.frame(Time = 1:nrow(ts), ts, Pattern = p) # Assuming the time series are evenly spaced - })) - + df <- unnest(x$data[1:n, ], cols = .data$observations) + # Melt the data into long format suitable for ggplot2 - df.p = melt(df.p, id.vars = c("Time", "Pattern")) - + df <- pivot_longer(df, !c(.data$label, .data$time), names_to = "band", values_to = "value") + # Construct the ggplot - gp = ggplot(df.p, aes(x = .data$Time, y = .data$value, colour = .data$variable)) + - geom_line() + - facet_wrap(~Pattern) + - theme(legend.position = "bottom") + - guides(colour = guide_legend(title = "Bands")) + - ylab("Value") - + gp <- ggplot(df, aes(x = .data$time, y = .data$value, colour = .data$band)) + + geom_line() + + facet_wrap(~label) + + theme(legend.position = "bottom") + + guides(colour = guide_legend(title = "Bands")) + + ylab("Value") + + xlab("Time") + return(gp) -} \ No newline at end of file + +} diff --git a/R/predict.R b/R/predict.R new file mode 100644 index 0000000..1df788a --- /dev/null +++ b/R/predict.R @@ -0,0 +1,34 @@ +#' Predict using the knn1_twdtw model +#' +#' This function predicts the classes of new data using the Time Warped Dynamic Time Warping (TWDTW) +#' method with a 1-nearest neighbor approach. The prediction is based on the minimum TWDTW distance +#' to the known patterns stored in the `knn1_twdtw` model. +#' +#' @param object A `knn1_twdtw` model object generated by the `knn1_twdtw` function. +#' @param newdata A data frame or similar object containing the new observations +#' (time series data) to be predicted. +#' @param ... Additional arguments passed to the \link[twdtw]{twdtw} function. +#' +#' @return A vector of predicted classes for the `newdata`. +#' +#' +#' @export +predict.knn1_twdtw <- function(object, newdata, ...){ + + + # Convert newdata to time series + newdata_ts <- prepare_time_series(newdata) + + # Compute TWDTW distances + distances <- sapply(object$data$observations, function(pattern){ + sapply(newdata_ts$observations, function(ts) { + proxy::dist(x = as.data.frame(ts), y = as.data.frame(pattern), method = "twdtw", ...) + }) + }) + + # Find the nearest neighbor for each observation in newdata + nearest_neighbor <- apply(distances, 1, which.min) + + # Return the predicted label for each observation based on the nearest neighbor + return(factor(object$data$label[nearest_neighbor])) +} diff --git a/R/prepare_time_series.R b/R/prepare_time_series.R new file mode 100644 index 0000000..d53bba4 --- /dev/null +++ b/R/prepare_time_series.R @@ -0,0 +1,81 @@ +#' Prepare a Time Series Tibble from a 2D stars Object with Bands and Time Attributes +#' +#' This function reshapes a data frame, which has been converted from a stars object, into a nested wide tibble format. +#' The stars object conversion often results in columns named in formats like "band.YYYY.MM.DD", "XYYYY.MM.DD.band", or "YYYY.MM.DD.band". +#' +#' @param x A data frame derived from a stars object containing time series data in wide format. +#' The column names should adhere to one of the following formats: "band.YYYY.MM.DD", "XYYYY.MM.DD.band", or "YYYY.MM.DD.band". +#' +#' @return A nested tibble in wide format. Each row of the tibble corresponds to a unique 'ts_id' that maintains the order from the original stars object. +#' The nested structure contains observations (time series) for each 'ts_id', including the 'time' of each observation, and individual bands are presented as separate columns. +#' +#' +prepare_time_series <- function(x) { + + # Remove the 'geom' column if it exists + x$geom <- NULL + var_names <- names(x) + var_names <- var_names[!var_names %in% 'label'] + + # Extract date and band information from the column names + date_band <- do.call(rbind, lapply(var_names, function(name) { + + # Replace any hyphen with periods for consistent processing + name <- gsub("-", "\\.", name) + + # Extract date and band info based on different name patterns + if (grepl("^.+\\.\\d{4}\\.\\d{2}\\.\\d{2}$", name)) { + date_str <- gsub("^.*?(\\d{4}\\.\\d{2}\\.\\d{2})$", "\\1", name) + band_str <- gsub("^(.+)\\.\\d{4}\\.\\d{2}\\.\\d{2}$", "\\1", name) + } + else if (grepl("^X\\d{4}\\.\\d{2}\\.\\d{2}\\..+$", name)) { + date_str <- gsub("^X(\\d{4}\\.\\d{2}\\.\\d{2})\\..+$", "\\1", name) + band_str <- gsub("^X\\d{4}\\.\\d{2}\\.\\d{2}\\.(.+)$", "\\1", name) + } + else if (grepl("^\\d{4}\\.\\d{2}\\.\\d{2}\\..+$", name)) { + date_str <- gsub("^(\\d{4}\\.\\d{2}\\.\\d{2})\\..+$", "\\1", name) + band_str <- gsub("^\\d{4}\\.\\d{2}\\.\\d{2}\\.(.+)$", "\\1", name) + } + else { + stop(paste("Unrecognized format in:", name)) + } + + # Convert the date string to Date format + date <- to_date_time(gsub("\\.", "-", date_str)) + + return(data.frame(time = date, band = band_str)) + })) + + # Construct tiem sereis + ns <- nrow(x) + x$ts_id <- 1:ns + if (!'label' %in% names(x)) { + x$label <- NA + } + x <- pivot_longer(x, !c(.data$ts_id, .data$label), names_to = "band_date", values_to = "value") + x$band <- rep(date_band$band, ns) + x$time <- rep(date_band$time, ns) + x$band_date <- NULL + result_df <- pivot_wider(x, id_cols = c(.data$ts_id, .data$label, .data$time), names_from = 'band', values_from = 'value') + result_df <- nest(result_df, .by = c(.data$ts_id, .data$label), .key = "observations") + + return(result_df) + +} + + +#### TO BE REMOVED: twdtw package will export this fucntion +to_date_time <- function(x){ + if (!inherits(x, c("Date", "POSIXt"))) { + # check if all strings in the vector include hours, minutes, and seconds + if (all(grepl("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", x))) { + x <- try(as.POSIXct(x), silent = TRUE) + } else { + x <- try(as.Date(x), silent = TRUE) + } + if (inherits(x, "try-error")) { + stop("Some elements of x could not be converted to a date or datetime format") + } + } + return(x) +} diff --git a/R/utils.R b/R/shift_dates.R similarity index 76% rename from R/utils.R rename to R/shift_dates.R index b9bb556..a37e060 100644 --- a/R/utils.R +++ b/R/shift_dates.R @@ -43,22 +43,8 @@ shift_dates <- function(x, origin = "1970-01-01") { return(shifted_dates) } -#### TO BE REMOVED: twdtw package will export this fucntion -to_date_time <- function(x){ - if (!inherits(x, c("Date", "POSIXt"))) { - # check if all strings in the vector include hours, minutes, and seconds - if (all(grepl("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", x))) { - x <- try(as.POSIXct(x), silent = TRUE) - } else { - x <- try(as.Date(x), silent = TRUE) - } - if (inherits(x, "try-error")) { - stop("Some elements of x could not be converted to a date or datetime format") - } - } + +shift_ts_dates <- function(x) { + x$time <- shift_dates(x$time) return(x) } - - - - diff --git a/R/train.R b/R/train.R new file mode 100644 index 0000000..06026c9 --- /dev/null +++ b/R/train.R @@ -0,0 +1,154 @@ +#' +#' Train a KNN-1 TWDTW model with optional GAM resampling +#' +#' This function prepares a KNN-1 model with the Time Warp Dynamic Time Warping (TWDTW) algorithm. +#' If a formula is provided, the training samples are resampled using Generalized Additive Models (GAM). +#' +#' @param x A two-dimensional stars object (x, y) with time and bands as attributes. +#' @param y An sf object with the coordinates of the training points. +#' @param formula Either NULL or a formula to reduce samples of the same label using Generalized Additive Models (GAM). +#' Default is \code{band ~ s(time)}. See details. +#' @param start_column Name of the column in y that indicates the start date. Default is 'start_date'. +#' @param end_column Name of the column in y that indicates the end date. Default is 'end_date'. +#' @param label_colum Name of the column in y containing land use labels. Default is 'label'. +#' @param sampling_freq The time frequency for sampling, including the unit (e.g., '16 day'). +#' If NULL, the function will infer the frequency. This parameter is only used if a formula is provided. +#' @param ... Additional arguments passed to the GAM function. +#' +#' @details If \code{formula} is NULL, the KNN-1 model will retain all training samples. If a formula is passed (e.g., \code{band ~ \link[mgcv]{s}(time)}), +#' then samples of the same label (land cover class) will be resampled using GAM. +#' Resampling can significantly reduce prediction processing time. +#' +#' @return A 'knn1_twdtw' model containing the trained model information and the data used. +#' +#' @examples +#' +#' # Example will be added once the function is properly implemented and tested. +#' +#' @export +knn1_twdtw <- function(x, y, formula = NULL, start_column = 'start_date', + end_column = 'end_date', label_colum = 'label', + sampling_freq = NULL, ...){ + + # Check if x is a stars object with a time dimension + if (!inherits(x, "stars") || length(dim(x)) != 2) { + stop("x must be a stars object with two dimensions") + } + + # Check if y is an sf object with point geometry + if (!inherits(y, "sf") || !all(st_is(y, "POINT"))) { + stop("y must be an sf object with point geometry") + } + + # Check for required columns in y + required_columns <- c(start_column, end_column, label_colum) + missing_columns <- setdiff(required_columns, names(y)) + if (length(missing_columns) > 0) { + stop(paste("Missing required columns in y:", paste(missing_columns, collapse = ", "))) + } + + # Convert columns to date-time + y[, start_column] <- to_date_time(y[[start_column]]) + y[, end_column] <- to_date_time(y[[end_column]]) + + # Prepare time series samples from stars object + ts_data <- st_extract(x, y) + ts_data$label <- y[[label_colum]] + ts_data <- prepare_time_series(as.data.frame(ts_data)) + ts_data$ts_id <- NULL + + if(!is.null(formula)) { + + # Check if formula has two + if(length(all.vars(formula)) != 2) { + stop("The formula should have only one predictor!") + } + + # Determine sampling frequency + if (is.null(sampling_freq)) { + sampling_freq <- get_time_series_freq(ts_data) + } + + # Shift dates + ts_data$observations <- lapply(ts_data$observations, shift_ts_dates) + + # Split data frame by label + ts_data <- unnest(ts_data, cols = .data$observations) + ts_data <- nest(ts_data, .by = .data$label, .key = "observations") + + # Define GAM function + gam_fun <- function(band, t, pred_t, formula, ...){ + df <- setNames(list(band, as.numeric(t)), all.vars(formula)) + pred_t[[all.vars(formula)[2]]] <- as.numeric(pred_t[[all.vars(formula)[2]]]) + fit <- mgcv::gam(data = df, formula = formula, ...) + predict(fit, newdata = pred_t) + } + + # Apply GAM function + ts_data$observations <- lapply(ts_data$observations, function(ts){ + y_time <- ts$time + ts$time <- NULL + pred_time <- setNames(list(seq(min(y_time), max(y_time), by = sampling_freq)), all.vars(formula)[2]) + cbind(pred_time, as.data.frame(sapply(as.list(ts), function(band) { + gam_fun(band, y_time, pred_time, formula, ...) + }))) + }) + + } + + model <- list() + model$call <- match.call() + model$formula <- formula + model$data <- ts_data + class(model) <- "knn1_twdtw" + + return(model) + +} + +#' Compute the Most Common Sampling Frequency in a Stars Object +#' +#' This function calculates the most common difference between consecutive time points in a stars object. +#' This can be useful for determining the sampling frequency of the time series data. +#' +#' @param x A stars object containing time series data. +#' +#' @return A difftime object representing the most common time difference between consecutive samples. +#' +#' +get_stars_time_freq <- function(x) { + + # Extract the time dimension + time_values <- st_get_dimension_values(x, "time") + + # Compute the differences between consecutive time points + time_diffs <- diff(time_values) + + # Convert differences to days (while retaining the difftime class) + time_diffs <- as.difftime(time_diffs, units = "days") + + # Identify the mode + mode_val_index <- which.max(tabulate(match(time_diffs, unique(time_diffs)))) + freq <- diff(time_values[mode_val_index:(mode_val_index+1)]) + + return(freq) +} + +get_time_series_freq <- function(x) { + + # Extract the time dimension + time_values <- unlist(lapply(x$observations, function(ts) ts$time)) + + # Compute the differences between consecutive time points + time_diffs <- unlist(lapply(x$observations, function(ts) diff(ts$time))) + + # Convert differences to days (while retaining the difftime class) + time_diffs <- as.difftime(time_diffs, units = "days") + + # Identify the mode + mode_val_index <- which.max(tabulate(match(time_diffs, unique(time_diffs)))) + freq <- diff(time_values[mode_val_index:(mode_val_index+1)]) + + return(freq) + +} \ No newline at end of file diff --git a/R/zzz.R b/R/zzz.R index 499fd6c..9e28f89 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,9 +3,9 @@ #' @import stars #' @import ggplot2 #' @importFrom stats as.formula predict setNames -#' @importFrom mgcv gam s predict.gam -#' @importFrom scales pretty_breaks date_format percent -#' @importFrom reshape2 melt +#' @importFrom mgcv gam s predict.gam #' @importFrom rlang .data +#' @importFrom tidyr pivot_longer pivot_wider nest unnest +#' @importFrom proxy dist #' NULL diff --git a/inst/mato_grosso_brazil/samples.gpkg b/inst/mato_grosso_brazil/samples.gpkg index 2fa2e37805a0d741c78f794137fefcd1df3bf420..888cbee560a06f5172e88295c069a08a9af749a4 100644 GIT binary patch delta 25 gcmZozz|pXPV}djz??f4AM&8DR)&$0_2}}$40cIx$l>h($ delta 25 gcmZozz|pXPV}djz_e2?IM()Oh)&$0_2}}$40cG(AkpKVy diff --git a/man/create_patterns.Rd b/man/create_patterns.Rd deleted file mode 100644 index 0ff9bd5..0000000 --- a/man/create_patterns.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_patterns.R -\name{create_patterns} -\alias{create_patterns} -\title{Create a Pattern Using GAM} -\usage{ -create_patterns( - x, - y, - formula = band ~ s(time), - start_column = "start_date", - end_column = "end_date", - label_colum = "label", - sampling_freq = NULL, - ... -) -} -\arguments{ -\item{x}{A three dimensions stars object (x, y, time) with the satellite image time series.} - -\item{y}{An sf object with the coordinates of the training points.} - -\item{formula}{A formula for the GAM. Default is \code{band ~ \link[mgcv]{s}(time)}.} - -\item{start_column}{Name of the column in y that indicates the start date. Default is 'start_date'.} - -\item{end_column}{Name of the column in y that indicates the end date. Default is 'end_date'.} - -\item{label_colum}{Name of the column in y that contains land use labels. Default is 'label'.} - -\item{sampling_freq}{The time frequency for sampling including unit, e.g '16 day'. If NULL, the function will infer it.} - -\item{...}{Additional arguments passed to the GAM function.} -} -\value{ -A list containing the predicted values for each label. -} -\description{ -This function creates a pattern based on Generalized Additive Models (GAM). -It uses the specified formula to fit the model and predict values. -} diff --git a/man/extract_time_series.Rd b/man/extract_time_series.Rd deleted file mode 100644 index c4aebe3..0000000 --- a/man/extract_time_series.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_patterns.R -\name{extract_time_series} -\alias{extract_time_series} -\title{Extract Time Series from a Stars Object for Specified Points} -\usage{ -extract_time_series(x, y) -} -\arguments{ -\item{x}{A stars object containing the raster time series data.} - -\item{y}{An sf object containing the point geometries and their associated labels.} -} -\value{ -A data.frame with the extracted time series for each point in the sf object, -with additional columns for the ID and label of each sample. -} -\description{ -This function extracts a time series from a stars object for each specified point in the sf object. -Each extracted sample is then labeled with an ID and the label from the sf object. -} diff --git a/man/get_stars_time_freq.Rd b/man/get_stars_time_freq.Rd index 47d90c4..09b3890 100644 --- a/man/get_stars_time_freq.Rd +++ b/man/get_stars_time_freq.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_patterns.R +% Please edit documentation in R/train.R \name{get_stars_time_freq} \alias{get_stars_time_freq} \title{Compute the Most Common Sampling Frequency in a Stars Object} diff --git a/man/knn1_twdtw.Rd b/man/knn1_twdtw.Rd new file mode 100644 index 0000000..02627bb --- /dev/null +++ b/man/knn1_twdtw.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/train.R +\name{knn1_twdtw} +\alias{knn1_twdtw} +\title{Train a KNN-1 TWDTW model with optional GAM resampling} +\usage{ +knn1_twdtw( + x, + y, + formula = NULL, + start_column = "start_date", + end_column = "end_date", + label_colum = "label", + sampling_freq = NULL, + ... +) +} +\arguments{ +\item{x}{A two-dimensional stars object (x, y) with time and bands as attributes.} + +\item{y}{An sf object with the coordinates of the training points.} + +\item{formula}{Either NULL or a formula to reduce samples of the same label using Generalized Additive Models (GAM). +Default is \code{band ~ s(time)}. See details.} + +\item{start_column}{Name of the column in y that indicates the start date. Default is 'start_date'.} + +\item{end_column}{Name of the column in y that indicates the end date. Default is 'end_date'.} + +\item{label_colum}{Name of the column in y containing land use labels. Default is 'label'.} + +\item{sampling_freq}{The time frequency for sampling, including the unit (e.g., '16 day'). +If NULL, the function will infer the frequency. This parameter is only used if a formula is provided.} + +\item{...}{Additional arguments passed to the GAM function.} +} +\value{ +A 'knn1_twdtw' model containing the trained model information and the data used. +} +\description{ +This function prepares a KNN-1 model with the Time Warp Dynamic Time Warping (TWDTW) algorithm. +If a formula is provided, the training samples are resampled using Generalized Additive Models (GAM). +} +\details{ +If \code{formula} is NULL, the KNN-1 model will retain all training samples. If a formula is passed (e.g., \code{band ~ \link[mgcv]{s}(time)}), +then samples of the same label (land cover class) will be resampled using GAM. +Resampling can significantly reduce prediction processing time. +} +\examples{ + +# Example will be added once the function is properly implemented and tested. + +} diff --git a/man/plot.knn1_twdtw.Rd b/man/plot.knn1_twdtw.Rd new file mode 100644 index 0000000..5f6c297 --- /dev/null +++ b/man/plot.knn1_twdtw.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_patterns.R +\name{plot.knn1_twdtw} +\alias{plot.knn1_twdtw} +\title{Plot Patterns from Time Series Data} +\usage{ +\method{plot}{knn1_twdtw}(x, n = 12, ...) +} +\arguments{ +\item{x}{A model of class \code{"knn1_twdtw"}.} + +\item{n}{An integer specifying the number of patterns to plot. Default is 12.} + +\item{...}{Additional arguments. Currently not used.} +} +\value{ +A \code{\link[ggplot2]{ggplot}} object displaying the time series patterns. +} +\description{ +This function visualizes time series data patterns from the \code{"knn1_twdtw"} model. +It produces a multi-faceted plot, +where each facet represents a different time series pattern from the model's data. +Within each facet, different attributes (e.g., bands or indices) are plotted as +distinct lines, differentiated by color. +} +\details{ +The function is designed for visual inspection of time series patterns from the model. +Each data frame in the model's data represents a time series. Columns within the data frame +correspond to different attributes (e.g., bands or indices), while rows represent time points. +The facet title is derived from the name of each time series in the model's data. +} diff --git a/man/plot_patterns.Rd b/man/plot_patterns.Rd deleted file mode 100644 index 97e24d6..0000000 --- a/man/plot_patterns.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_patterns.R -\name{plot_patterns} -\alias{plot_patterns} -\title{Plot Patterns from Time Series Data} -\usage{ -plot_patterns(x, ...) -} -\arguments{ -\item{x}{A list where each element is a data.frame representing a time series. -Each data.frame should have the same number of rows and columns, -with columns representing different attributes (e.g., bands or indices) -and rows representing time points. -The name of each element in the list will be used as the facet title.} - -\item{...}{Not used.} -} -\value{ -A ggplot object displaying the time series patterns. -} -\description{ -This function takes a list of time series data and creates a multi-faceted plot -where each facet corresponds to a different time series from the list. -Within each facet, different attributes (columns of the time series) are -plotted as lines with different colors. -} diff --git a/man/predict.knn1_twdtw.Rd b/man/predict.knn1_twdtw.Rd new file mode 100644 index 0000000..605217e --- /dev/null +++ b/man/predict.knn1_twdtw.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict.R +\name{predict.knn1_twdtw} +\alias{predict.knn1_twdtw} +\title{Predict using the knn1_twdtw model} +\usage{ +\method{predict}{knn1_twdtw}(object, newdata, ...) +} +\arguments{ +\item{object}{A \code{knn1_twdtw} model object generated by the \code{knn1_twdtw} function.} + +\item{newdata}{A data frame or similar object containing the new observations +(time series data) to be predicted.} + +\item{...}{Additional arguments passed to the \link[twdtw]{twdtw} function.} +} +\value{ +A vector of predicted classes for the \code{newdata}. +} +\description{ +This function predicts the classes of new data using the Time Warped Dynamic Time Warping (TWDTW) +method with a 1-nearest neighbor approach. The prediction is based on the minimum TWDTW distance +to the known patterns stored in the \code{knn1_twdtw} model. +} diff --git a/man/prepare_time_series.Rd b/man/prepare_time_series.Rd new file mode 100644 index 0000000..dfce70e --- /dev/null +++ b/man/prepare_time_series.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prepare_time_series.R +\name{prepare_time_series} +\alias{prepare_time_series} +\title{Prepare a Time Series Tibble from a 2D stars Object with Bands and Time Attributes} +\usage{ +prepare_time_series(x) +} +\arguments{ +\item{x}{A data frame derived from a stars object containing time series data in wide format. +The column names should adhere to one of the following formats: "band.YYYY.MM.DD", "XYYYY.MM.DD.band", or "YYYY.MM.DD.band".} +} +\value{ +A nested tibble in wide format. Each row of the tibble corresponds to a unique 'ts_id' that maintains the order from the original stars object. +The nested structure contains observations (time series) for each 'ts_id', including the 'time' of each observation, and individual bands are presented as separate columns. +} +\description{ +This function reshapes a data frame, which has been converted from a stars object, into a nested wide tibble format. +The stars object conversion often results in columns named in formats like "band.YYYY.MM.DD", "XYYYY.MM.DD.band", or "YYYY.MM.DD.band". +} diff --git a/man/shift_dates.Rd b/man/shift_dates.Rd index 87d58c1..9af6b7e 100644 --- a/man/shift_dates.Rd +++ b/man/shift_dates.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/shift_dates.R \name{shift_dates} \alias{shift_dates} \title{Shift Dates to Start on a Specified Origin Year} diff --git a/tests/testthat/test-twdtw_classify.R b/tests/testthat/test-twdtw_classify.R index f5c0c04..6beb6a1 100644 --- a/tests/testthat/test-twdtw_classify.R +++ b/tests/testthat/test-twdtw_classify.R @@ -1,8 +1,5 @@ -library(stars) -library(stringr) - # Read training samples -samples <- st_read(system.file("mato_grosso_brazil/samples.gpkg", package = "dtwSat")) +samples <- st_read(system.file("mato_grosso_brazil/samples.gpkg", package = "dtwSat"), quiet = TRUE) # Satellite image time sereis files tif_files <- system.file("mato_grosso_brazil", package = "dtwSat") |> @@ -10,18 +7,23 @@ tif_files <- system.file("mato_grosso_brazil", package = "dtwSat") |> # The acquisition date is in the file name are not the true acquisition date of each pixel # MOD13Q1 is a 16-day composite product, so the acquisition date is the first day of the 16-day period -acquisition_date <- as.Date(str_extract(tif_files, "[0-9]{8}"), format = "%Y%m%d") - -# Read the data as a stars object, setting time as a dimension and band as attribute -dc <- read_stars(tif_files, proxy = FALSE, along = list(time = acquisition_date)) |> - st_set_dimensions(3, c("EVI", "NDVI", "RED", "BLUE", "NIR", "MIR", "DOY")) |> - split("band") +acquisition_date <- as.Date(regmatches(tif_files, regexpr("[0-9]{8}", tif_files)), format = "%Y%m%d") -# Remove the DOY band - this will be supported int the future -dc <- dc[c("EVI", "NDVI", "RED", "BLUE", "NIR", "MIR")] +# Read the data as a stars object setting the time/date for each observation using along +# This will prodcue a 4D array +# Split 'band' and 'time' dimensions to create a 2D array +dc <- read_stars(tif_files, proxy = FALSE, along = list(time = acquisition_date), RasterIO = list(bands = 1:6)) |> + st_set_dimensions(3, c("EVI", "NDVI", "RED", "BLUE", "NIR", "MIR")) |> + split(c("band")) |> + split(c("time")) # Get temporal patters -ts_patterns <- create_patterns(x = dc, y = samples) +m <- knn1_twdtw(x = dc, y = samples, formula = band ~ s(time), sampling_freq = 16) # Visualize patterns -plot_patterns(ts_patterns) +plot(m) + +# Classify stars +system.time(lu <- predict(dc, model = m, drop_dimensions = TRUE, cycle_length = 'year', time_scale = 'day', time_weight = c(steepness = 0.1, midpoint = 50))) + +# write_stars(lu, "~/Downloads/lu.tif") \ No newline at end of file