From 475c5cae5d4d4d9f1b8782026d7cce736fe3ac27 Mon Sep 17 00:00:00 2001 From: vwmaus Date: Sat, 30 Oct 2021 16:36:19 +0200 Subject: [PATCH] Adds parallel implementation using foreach --- DESCRIPTION | 7 +++++-- NAMESPACE | 2 ++ R/twdtwApply.R | 15 ++++++++++----- R/zzz.R | 1 + examples/fast_twdtw.R | 21 ++++++++++++++------- man/twdtwReduceTime.Rd | 17 ++++------------- 6 files changed, 36 insertions(+), 27 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b0487a4..09c98a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,13 +37,16 @@ Imports: mgcv, xtable, Rdpack, - data.table + data.table, + foreach, + parallel Suggests: gridExtra, grid, png, Hmisc, - rbenchmark + rbenchmark, + doParallel License: GPL (>= 3) | file LICENSE URL: https://github.com/vwmaus/dtwSat/ BugReports: https://github.com/vwmaus/dtwSat/issues diff --git a/NAMESPACE b/NAMESPACE index f98fe06..65bd612 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -87,6 +87,8 @@ importFrom(dtw,asymmetric) importFrom(dtw,rabinerJuangStepPattern) importFrom(dtw,symmetric1) importFrom(dtw,symmetric2) +importFrom(foreach,"%dopar%") +importFrom(foreach,foreach) importFrom(grDevices,gray.colors) importFrom(grDevices,terrain.colors) importFrom(lubridate,"day<-") diff --git a/R/twdtwApply.R b/R/twdtwApply.R index 707740c..34f57f8 100644 --- a/R/twdtwApply.R +++ b/R/twdtwApply.R @@ -344,8 +344,8 @@ twdtwApply.twdtwRaster = function(x, y, weight.fun, dist.method, step.matrix, n, fun } -fasttwdtwApply = function(x, y, dist.method="Euclidean", step.matrix = symmetric1, n=NULL, progress = "text", ncores = 1, - span=NULL, min.length=0, breaks=NULL, from=NULL, to=NULL, by=NULL, overlap=0.5, fill = 255, filepath="", ...){ +fasttwdtwApply = function(x, y, dist.method="Euclidean", step.matrix = symmetric1, n=NULL, progress = "text", ncores = 1, paralle = FALSE, + span=NULL, min.length=0, breaks=NULL, from=NULL, to=NULL, by=NULL, overlap=0.5, fill = 255, filepath="", chunksize, minrows=1, ...){ # x = rts # y = temporal_patterns # dist.method="Euclidean" @@ -431,7 +431,7 @@ fasttwdtwApply = function(x, y, dist.method="Euclidean", step.matrix = symmetric names(vv) <- names(out) } - bs <- blockSize(x@timeseries[[1]]) + bs <- blockSize(x@timeseries[[1]], chunksize = chunksize, minrows = minrows) bs$array_rows <- cumsum(c(1, bs$nrows*out[[1]]@ncols)) pb <- pbCreate(bs$n, progress) @@ -452,9 +452,14 @@ fasttwdtwApply = function(x, y, dist.method="Euclidean", step.matrix = symmetric }) # Apply TWDTW analysis - twdtw_results <- parallel::mclapply(ts, mc.cores = ncores, FUN = twdtwReduceTime, y = y, breaks = breaks, ...) + twdtw_results <- foreach( + i = ts, + .combine = 'rbind' + ) %dopar% { + twdtwReduceTime(x = i, y = y, breaks = breaks, ...) + } - twdtw_results <- data.table::rbindlist(twdtw_results)[,c("label","distance")] + # twdtw_results <- data.table::rbindlist(twdtw_results)[,c("label","distance")] twdtw_label <- matrix(twdtw_results$label, ncol = length(breaks)-1, byrow = TRUE) twdtw_distance <- matrix(twdtw_results$distance, ncol = length(breaks)-1, byrow = TRUE) diff --git a/R/zzz.R b/R/zzz.R index aa571eb..2ff7e73 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -34,6 +34,7 @@ #' @import ggplot2 #' @import methods #' @import rgdal +#' @importFrom foreach foreach %dopar% #' @importFrom proxy dist pr_DB #' @importFrom reshape2 melt #' @importFrom scales pretty_breaks date_format percent diff --git a/examples/fast_twdtw.R b/examples/fast_twdtw.R index 62dbfb4..f8d8028 100644 --- a/examples/fast_twdtw.R +++ b/examples/fast_twdtw.R @@ -38,16 +38,23 @@ # Create temporal patterns temporal_patterns <- createPatterns(training_ts, freq = 8, formula = y ~ s(x)) - # Set TWDTW weight function - # log_fun <- logisticWeight(-0.1, 50) - - # Run fast-TWDTW analysis + # Run sequential fast-TWDTW analysis + foreach::registerDoSEQ() system.time( - # The logistic time weigh is codeded in Fortran: TODO: add logit parameters to function call - # parallel uses parallel::mclapply - not so much implementation - fast_lucc <- dtwSat:::fasttwdtwApply(x = rts, y = temporal_patterns, ncores = 1, progress = 'text') + # The logistic time weigh is in the Fortran code: TODO: add logit parameters to function call + fast_lucc <- dtwSat:::fasttwdtwApply(x = rts, y = temporal_patterns, progress = 'text', minrows = 27) ) + # Run parallel fast-TWDTW + cl <- parallel::makeCluster(parallel::detectCores(), type = "FORK") + doParallel::registerDoParallel(cl) + foreach::getDoParRegistered() + system.time( + fast_lucc <- dtwSat:::fasttwdtwApply(x = rts, y = temporal_patterns, progress = 'text', minrows = 27) + ) + foreach::registerDoSEQ() + parallel::stopCluster(cl) + # Plot TWDTW distances for the first year plot(fast_lucc, type = "distance", time.levels = 1) diff --git a/man/twdtwReduceTime.Rd b/man/twdtwReduceTime.Rd index d2f56d7..dca7c26 100644 --- a/man/twdtwReduceTime.Rd +++ b/man/twdtwReduceTime.Rd @@ -7,7 +7,6 @@ twdtwReduceTime( x, y, - weight.fun = NULL, dist.method = "Euclidean", step.matrix = symmetric1, from = NULL, @@ -28,14 +27,6 @@ the column names in the temporal patterns \code{y}.} \item{y}{a list of data.frame objects similar to \code{x}. The temporal patterns used to classify the time series in \code{x}.} -\item{weight.fun}{A function. Any function that receives two matrices and -performs a computation on them, returning a single matrix with the same -dimensions. The first matrix is the DTW local cost matrix and the -second a matrix of the time differences in days. The function should return a -matrix of DTW local cost weighted by the time differences. If not declared -the time-weight is zero. In this case the function runs the standard version -of the dynamic time warping. See details.} - \item{dist.method}{A character. Method to derive the local cost matrix. Default is ''Euclidean'' see \code{\link[proxy]{dist}} in package \pkg{proxy}.} @@ -60,8 +51,9 @@ between one match and the interval of classification. Default is 0.5, } \description{ This function is a minimalist implementation of -\link[dtwSat]{twdtwApply} that is in average 3x faster. It does not keep any -intermediate data. It performs a multidimensional TWDTW analysis +\link[dtwSat]{twdtwApply} that is in average 3x faster. The time weight function +is coded in Fortran. It does not keep any intermediate data. +It performs a multidimensional TWDTW analysis \insertCite{Maus:2019}{dtwSat} and retrieves only the best matches between the unclassified time series and the patterns for each defined time interval. } @@ -69,7 +61,6 @@ the unclassified time series and the patterns for each defined time interval. \dontrun{ library(dtwSat) -log_fun = logisticWeight(-0.1, 50) from = "2009-09-01" to = "2017-09-01" by = "12 month" @@ -88,7 +79,7 @@ mn_ts <- read.csv(system.file("reduce_time/ts_MODIS13Q1.csv", package = "dtwSat" rbenchmark::benchmark( original = twdtwClassify(twdtwApply(x = tw_ts, y = tw_patt, weight.fun = log_fun), from = from, to = to, by = by)[[1]], - minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, weight.fun = log_fun, + minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, from = from, to = to, by = by) ) }