Adds example using raster time series and minimalist twdtw

r-spatial · Oct 17, 2021 · 1883dd3 · 1883dd3
1 parent 085dee7
commit 1883dd3
Show file tree

Hide file tree

Showing 9 changed files with 391 additions and 24 deletions.
diff --git a/R/twdtw.R b/R/twdtw.R
@@ -39,8 +39,10 @@
     psi = .g(dist(doyy, doyx, method=dist.method))
     # Weighted local cost matrix 
     cm = weight.fun(phi, psi)
-
     # Compute cost matris 
+    # xm = na.omit(cbind(doyx, as.matrix(timeseries)))
+    # ym = na.omit(cbind(doyy, as.matrix(pattern)))
+    # internals = .computecost_fast(xm, ym, step.matrix)
     internals = .computecost(cm, step.matrix)
     internals$timeWeight = matrix(psi, nrow = nrow(psi))
     internals$localMatrix = matrix(cm, nrow = nrow(cm))

diff --git a/R/twdtwApply.R b/R/twdtwApply.R
@@ -344,3 +344,148 @@ twdtwApply.twdtwRaster = function(x, y, weight.fun, dist.method, step.matrix, n,
   fun
 }
 
+fasttwdtwApply = function(x, y, dist.method="Euclidean", step.matrix = symmetric1, n=NULL, progress = "text", ncores = 1,
+                          span=NULL, min.length=0, breaks=NULL, from=NULL, to=NULL, by=NULL, overlap=0.5, fill = 255, filepath="", ...){
+  # x = rts
+  # y = temporal_patterns
+  # dist.method="Euclidean"
+  # step.matrix = symmetric1
+  # n=NULL
+  # span=NULL
+  # min.length=0
+  # breaks=NULL
+  # from=NULL
+  # to=NULL
+  # by=NULL
+  # overlap=0.5
+  # filepath=""
+
+  if(is.twdtwTimeSeries(y)){
+    y <- lapply(y@timeseries, function(yy){
+      date <- index(yy)
+      yy <- as.data.frame(yy)
+      yy <- cbind(date, yy)
+    })
+  }
+
+  if(any(!sapply(y, function(yy) "date" %in% names(yy)))){
+    stop("every patter in y must have a column called 'date'")
+  }
+
+  if(is.null(breaks))
+    if( !is.null(from) &  !is.null(to) ){
+      breaks = seq(as.Date(from), as.Date(to), by=by)    
+    } else {
+      patt_range = lapply(y, function(yy) range(yy$date))
+      patt_diff = trunc(sapply(patt_range, diff)/30)+1
+      min_range = which.min(patt_diff)
+      by = patt_diff[[min_range]]
+      from = patt_range[[min_range]][1]
+      to = from 
+      month(to) = month(to) + by
+      year(from) = year(range(index(x))[1])
+      year(to) = year(range(index(x))[2])
+      if(to<from) year(to) = year(to) + 1
+      breaks = seq(from, to, paste(by,"month"))
+      by = paste(by, "month")
+    }
+  breaks = as.Date(breaks)
+
+  # Match raster bands to pattern bands
+  raster_bands <- coverages(x)
+  pattern_bands <- names(y[[1]])
+  matching_bands <- pattern_bands[pattern_bands %in% raster_bands]
+
+  if(length(matching_bands) < 1)
+    stop(paste0("Bands from twdtwRaster do not match the bands from patterns"))
+
+  if("doy" %in% coverages(x) & !"doy" %in% matching_bands)
+    matching_bands <- c("doy", matching_bands)
+
+  x <- subset(x, layers = matching_bands)
+  raster_bands <- coverages(x)
+
+  # Set raster levels and labels 
+  levels <- names(y)
+  names(levels) <- levels
+
+  # Create output raster objects 
+  r_template <- brick(x@timeseries[[1]], nl = length(breaks) - 1, values = FALSE)
+  out <- rep(list(r_template), 2)
+  names(out) <- c("label", "distance")
+
+  filepath <- trim(filepath)
+  filename <- NULL
+  if (filepath != "") {
+    dir.create(path = filepath, showWarnings = TRUE, recursive = TRUE)
+    filename <- paste0(filepath, "/", names(out), ".grd")
+    names(filename) <- names(out)
+  } else if (!canProcessInMemory(r_template, n = length(breaks) + length(x@timeseries) )) {
+    filename <- sapply(names(out), rasterTmpFile)
+  }
+
+  if (!is.null(filename)) {
+    out <- lapply(names(out), function(i) writeStart(out[[i]], filename = filename[i], ...))
+  } else {
+    vv <- lapply(names(out), function(i) matrix(out[[i]], ncol = nlayers(out[[i]])))
+    names(vv) <- names(out)
+  }
+
+  bs <- blockSize(x@timeseries[[1]])
+  bs$array_rows <- cumsum(c(1, bs$nrows*out[[1]]@ncols))
+  pb <- pbCreate(bs$n, progress)
+
+  for(k in 1:bs$n){
+
+    # Get raster data
+    v <- lapply(x@timeseries, getValues, row = bs$row[k], nrows = bs$nrows[k])
+
+    # Get time series
+    ts <- lapply(1:nrow(v[[1]]), function(i){
+      # Get time series dates
+      if(any(names(v) %in% "doy")){
+        timeline <- dtwSat::getDatesFromDOY(year = format(dtwSat::index(x), "%Y"), doy = v[["doy"]][i,])
+      } else {
+        timeline <- dtwSat::index(x)
+      } 
+      data.frame(sapply(v[!(names(v) %in% "doy")], function(v) v[i, , drop = FALSE]), date = timeline)
+    })
+
+    # Apply TWDTW analysis
+    twdtw_results <- parallel::mclapply(ts, mc.cores = ncores, FUN = twdtwReduceTime, y = y, breaks = breaks, ...)
+
+    twdtw_results <- data.table::rbindlist(twdtw_results)[,c("label","distance")]
+    twdtw_label <- matrix(twdtw_results$label, ncol = length(breaks)-1, byrow = TRUE)
+    twdtw_distance <- matrix(twdtw_results$distance, ncol = length(breaks)-1, byrow = TRUE)
+
+    # Get best matches for each point, period, and pattern
+    m <- length(levels)
+    h <- length(breaks) - 1
+
+    rows <- seq(from = bs$array_rows[k], by = 1, length.out = bs$nrows[k]*out[[1]]@ncols)
+    if (!is.null(filename)) {
+      writeValues(out$label, twdtw_label, bs$row[k])
+      writeValues(out$distance, twdtw_distance, bs$row[k])
+    } else {
+      vv$label[rows,] <- twdtw_label
+      vv$distance[rows,] <- twdtw_distance
+    }
+
+    pbStep(pb, k)
+
+  }
+
+  if (!is.null(filename)) {
+    out <- lapply(out, writeStop)
+  } else {
+    out <- lapply(seq_along(out), function(i) setValues(out[[i]], values = vv[[i]]))
+  }
+
+  pbClose(pb)
+
+  names(out) <- c("label", "distance")
+
+  twdtwRaster(Class = out$label, Distance = out$distance, ..., timeline = breaks[-1], 
+              labels = c(levels, "unclassified"), levels = c(seq_along(levels), fill), filepath = filepath)
+
+}
diff --git a/R/twdtw_reduce_time.R b/R/twdtw_reduce_time.R
@@ -5,8 +5,9 @@
 #' @rdname twdtwReduceTime 
 #' 
 #' @description This function is a minimalist implementation of 
-#' \link[dtwSat]{twdtwApply} that is in average 3x faster. It does not keep any 
-#' intermediate data. It performs a multidimensional TWDTW analysis 
+#' \link[dtwSat]{twdtwApply} that is in average 3x faster. The time weight function 
+#' is coded in Fortran. It does not keep any intermediate data. 
+#' It performs a multidimensional TWDTW analysis 
 #' \insertCite{Maus:2019}{dtwSat} and retrieves only the best matches between 
 #' the unclassified time series and the patterns for each defined time interval.
 #' 
@@ -28,7 +29,6 @@
 #' \dontrun{
 #' 
 #' library(dtwSat)
-#' log_fun = logisticWeight(-0.1, 50)
 #' from = "2009-09-01"
 #' to = "2017-09-01"
 #' by = "12 month"
@@ -47,20 +47,20 @@
 #' rbenchmark::benchmark(
 #'   original = twdtwClassify(twdtwApply(x = tw_ts, y = tw_patt, weight.fun = log_fun), 
 #'                                       from = from, to = to, by = by)[[1]],
-#'   minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, weight.fun = log_fun, 
+#'   minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, 
 #'                                       from = from, to = to, by = by)  
 #'  )
 #' }
 #' 
 #' @export
 twdtwReduceTime = function(x, 
                            y, 
-                           weight.fun = NULL,
                            dist.method = "Euclidean",
                            step.matrix = symmetric1,
                            from = NULL, 
                            to = NULL, 
                            by = NULL, 
+                           breaks = NULL,
                            overlap = .5, 
                            fill = 255){
 
@@ -88,22 +88,10 @@ twdtwReduceTime = function(x,
     doyy <- as.numeric(format(ty, "%j")) 
     doyx <- as.numeric(format(tx, "%j")) 
 
-    # if(!is.null(weight.fun)){
-    #   # Get day of the year for pattern and time series 
-    #   doyy <- as.numeric(format(ty, "%j")) 
-    #   doyx <- as.numeric(format(tx, "%j")) 
-    #   
-    #   # Compute time-weght matrix 
-    #   w <- .g(proxy::dist(doyy, doyx, method = dist.method))
-    #   
-    #   # Apply time-weight to local cost matrix 
-    #   cm <- weight.fun(cm, w)
-    # }
     # Compute accumulated DTW cost matrix 
-    #internals <- dtwSat:::.computecost(cm = cm, step.matrix = step.matrix)
     xm = na.omit(cbind(doyx, as.matrix(px)))
     ym = na.omit(cbind(doyy, as.matrix(py)))
-    internals = .computecost_fast(xm, ym, step.matrix)
+    internals = .fast_twdtw(xm, ym, step.matrix)
 
     # Find all low cost candidates 
     a <- internals$startingMatrix[internals$N,1:internals$M]
@@ -132,7 +120,10 @@ twdtwReduceTime = function(x,
   aligs <- data.table::rbindlist(aligs)
 
   # Create classification intervals 
-  breaks <- seq(as.Date(from), as.Date(to), by = by)
+  if(is.null(breaks)){
+    breaks <- seq(as.Date(from), as.Date(to), by = by) 
+  }
+
   # Find best macthes for the intervals 
   best_matches <- .bestmatches(
     x = list(aligs[order(aligs$from, aligs$from),,drop=FALSE]), 
@@ -187,3 +178,37 @@ twdtwReduceTime = function(x,
   res$M = m
   res
 }
+
+# @useDynLib dtwSat computecost
+.fast_twdtw = function(xm, ym, step.matrix){
+
+  #  cm = rbind(0, cm)
+  n = nrow(ym)
+  m = nrow(xm)
+  d = ncol(ym)
+
+  if(is.loaded("fast_twdtw", PACKAGE = "dtwSat", type = "Fortran")){
+    out = .Fortran(fast_twdtw, 
+                   XM = matrix(as.double(xm), m, d),
+                   YM = matrix(as.double(ym), n, d),
+                   CM = matrix(as.double(0), n+1, m),
+                   DM = matrix(as.integer(0), n+1, m),
+                   VM = matrix(as.integer(0), n+1, m),
+                   SM = matrix(as.integer(step.matrix), nrow(step.matrix), ncol(step.matrix)),
+                   N  = as.integer(n),
+                   M  = as.integer(m),
+                   D  = as.integer(d),
+                   NS = as.integer(nrow(step.matrix)))
+  } else {
+    stop("Fortran fast_twdtw lib is not loaded")
+  }
+  # sqrt(sum((ym[1,-1] - xm[1,-1])*(ym[1,-1] - xm[1,-1])))
+  res = list()
+  res$costMatrix = out$CM[-1,]
+  res$directionMatrix = out$DM[-1,]
+  res$startingMatrix = out$VM[-1,]
+  res$stepPattern = step.matrix
+  res$N = n
+  res$M = m
+  res
+}
diff --git a/examples/benchmark_minimalist.R b/examples/benchmark_minimalist.R
@@ -1,7 +1,7 @@
 library(dtwSat)
 log_fun = logisticWeight(-0.1, 50)
 from = "2009-09-01"
-to = "2017-09-01"
+to = "2017-08-31"
 by = "12 month"
 
 # S4 objects for original implementation 
@@ -15,6 +15,6 @@ mn_ts <- read.csv(system.file("reduce_time/ts_MODIS13Q1.csv", package = "dtwSat"
 # Benchtmark 
 rbenchmark::benchmark(
   original = twdtwClassify(twdtwApply(x = tw_ts, y = tw_patt, weight.fun = log_fun), from = from, to = to, by = by)[[1]],
-  minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, weight.fun = log_fun, from = from, to = to, by = by)  
+  minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, from = from, to = to, by = by)  
 )
 
diff --git a/examples/fast_twdtw.R b/examples/fast_twdtw.R
@@ -0,0 +1,81 @@
+\dontrun{
+
+  # Example of TWDTW analysis using raster files 
+  library(dtwSat)
+  library(caret)
+
+  # Load raster data 
+  evi  <- brick(system.file("lucc_MT/data/evi.tif",  package = "dtwSat"))
+  ndvi <- brick(system.file("lucc_MT/data/ndvi.tif", package = "dtwSat"))
+  red  <- brick(system.file("lucc_MT/data/red.tif",  package = "dtwSat"))
+  blue <- brick(system.file("lucc_MT/data/blue.tif", package = "dtwSat"))
+  nir  <- brick(system.file("lucc_MT/data/nir.tif",  package = "dtwSat"))
+  mir  <- brick(system.file("lucc_MT/data/mir.tif",  package = "dtwSat"))
+  doy  <- brick(system.file("lucc_MT/data/doy.tif",  package = "dtwSat"))
+  timeline <- 
+    scan(system.file("lucc_MT/data/timeline", package = "dtwSat"), what="date")
+
+  # Create raster time series 
+  rts <- twdtwRaster(evi, ndvi, red, blue, nir, mir, timeline = timeline, doy = doy)
+
+  # Load field samples and projection 
+  field_samples <- 
+    read.csv(system.file("lucc_MT/data/samples.csv", package = "dtwSat"))
+  proj_str <- 
+    scan(system.file("lucc_MT/data/samples_projection", package = "dtwSat"), 
+         what = "character")
+
+  # Split samples for training (10%) and validation (90%) using stratified sampling 
+  set.seed(1)
+  I <- unlist(createDataPartition(field_samples$label, p = 0.1))
+  training_samples <- field_samples[I, ]
+  validation_samples <- field_samples[-I, ]
+
+  # Get time series form raster
+  training_ts <- getTimeSeries(rts, y = training_samples, proj4string = proj_str)
+  validation_ts <- getTimeSeries(rts, y = validation_samples, proj4string = proj_str)
+
+  # Create temporal patterns 
+  temporal_patterns <- createPatterns(training_ts, freq = 8, formula = y ~ s(x))
+
+  # Set TWDTW weight function 
+  # log_fun <- logisticWeight(-0.1, 50)
+
+  # Run fast-TWDTW analysis
+  system.time(
+    # The logistic time weigh is codeded in Fortran: TODO: add logit parameters to function call
+    # parallel uses parallel::mclapply - not so much implementation
+    fast_lucc <- dtwSat:::fasttwdtwApply(x = rts, y = temporal_patterns, ncores = 1, progress = 'text')
+  )
+
+  # Plot TWDTW distances for the first year 
+  plot(fast_lucc, type = "distance", time.levels = 1)
+
+  # Plot TWDTW classification results 
+  plot(fast_lucc, type = "map")
+
+  # Assess classification 
+  twdtw_assess <- 
+    twdtwAssess(object = fast_lucc, y = validation_samples, 
+                proj4string = proj_str, conf.int = .95) 
+
+  # Plot map accuracy 
+  plot(twdtw_assess, type = "accuracy")
+
+  # Plot area uncertainty 
+  plot(twdtw_assess, type = "area")
+
+  # Plot misclassified samples  
+  plot(twdtw_assess, type = "map", samples = "incorrect")
+
+  # Get latex table with error matrix 
+  twdtwXtable(twdtw_assess, table.type = "matrix")
+
+  # Get latex table with error accuracy 
+  twdtwXtable(twdtw_assess, table.type = "accuracy")
+
+  # Get latex table with area uncertainty 
+  twdtwXtable(twdtw_assess, table.type = "area")
+
+}
+
diff --git a/man/twdtwReduceTime.Rd b/man/twdtwReduceTime.Rd