Skip to content

Commit

Permalink
Adds example using raster time series and minimalist twdtw
Browse files Browse the repository at this point in the history
  • Loading branch information
vwmaus committed Oct 17, 2021
1 parent 085dee7 commit 1883dd3
Show file tree
Hide file tree
Showing 9 changed files with 391 additions and 24 deletions.
4 changes: 3 additions & 1 deletion R/twdtw.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@
psi = .g(dist(doyy, doyx, method=dist.method))
# Weighted local cost matrix
cm = weight.fun(phi, psi)

# Compute cost matris
# xm = na.omit(cbind(doyx, as.matrix(timeseries)))
# ym = na.omit(cbind(doyy, as.matrix(pattern)))
# internals = .computecost_fast(xm, ym, step.matrix)
internals = .computecost(cm, step.matrix)
internals$timeWeight = matrix(psi, nrow = nrow(psi))
internals$localMatrix = matrix(cm, nrow = nrow(cm))
Expand Down
145 changes: 145 additions & 0 deletions R/twdtwApply.R
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,148 @@ twdtwApply.twdtwRaster = function(x, y, weight.fun, dist.method, step.matrix, n,
fun
}

fasttwdtwApply = function(x, y, dist.method="Euclidean", step.matrix = symmetric1, n=NULL, progress = "text", ncores = 1,
span=NULL, min.length=0, breaks=NULL, from=NULL, to=NULL, by=NULL, overlap=0.5, fill = 255, filepath="", ...){
# x = rts
# y = temporal_patterns
# dist.method="Euclidean"
# step.matrix = symmetric1
# n=NULL
# span=NULL
# min.length=0
# breaks=NULL
# from=NULL
# to=NULL
# by=NULL
# overlap=0.5
# filepath=""

if(is.twdtwTimeSeries(y)){
y <- lapply(y@timeseries, function(yy){
date <- index(yy)
yy <- as.data.frame(yy)
yy <- cbind(date, yy)
})
}

if(any(!sapply(y, function(yy) "date" %in% names(yy)))){
stop("every patter in y must have a column called 'date'")
}

if(is.null(breaks))
if( !is.null(from) & !is.null(to) ){
breaks = seq(as.Date(from), as.Date(to), by=by)
} else {
patt_range = lapply(y, function(yy) range(yy$date))
patt_diff = trunc(sapply(patt_range, diff)/30)+1
min_range = which.min(patt_diff)
by = patt_diff[[min_range]]
from = patt_range[[min_range]][1]
to = from
month(to) = month(to) + by
year(from) = year(range(index(x))[1])
year(to) = year(range(index(x))[2])
if(to<from) year(to) = year(to) + 1
breaks = seq(from, to, paste(by,"month"))
by = paste(by, "month")
}
breaks = as.Date(breaks)

# Match raster bands to pattern bands
raster_bands <- coverages(x)
pattern_bands <- names(y[[1]])
matching_bands <- pattern_bands[pattern_bands %in% raster_bands]

if(length(matching_bands) < 1)
stop(paste0("Bands from twdtwRaster do not match the bands from patterns"))

if("doy" %in% coverages(x) & !"doy" %in% matching_bands)
matching_bands <- c("doy", matching_bands)

x <- subset(x, layers = matching_bands)
raster_bands <- coverages(x)

# Set raster levels and labels
levels <- names(y)
names(levels) <- levels

# Create output raster objects
r_template <- brick(x@timeseries[[1]], nl = length(breaks) - 1, values = FALSE)
out <- rep(list(r_template), 2)
names(out) <- c("label", "distance")

filepath <- trim(filepath)
filename <- NULL
if (filepath != "") {
dir.create(path = filepath, showWarnings = TRUE, recursive = TRUE)
filename <- paste0(filepath, "/", names(out), ".grd")
names(filename) <- names(out)
} else if (!canProcessInMemory(r_template, n = length(breaks) + length(x@timeseries) )) {
filename <- sapply(names(out), rasterTmpFile)
}

if (!is.null(filename)) {
out <- lapply(names(out), function(i) writeStart(out[[i]], filename = filename[i], ...))
} else {
vv <- lapply(names(out), function(i) matrix(out[[i]], ncol = nlayers(out[[i]])))
names(vv) <- names(out)
}

bs <- blockSize(x@timeseries[[1]])
bs$array_rows <- cumsum(c(1, bs$nrows*out[[1]]@ncols))
pb <- pbCreate(bs$n, progress)

for(k in 1:bs$n){

# Get raster data
v <- lapply(x@timeseries, getValues, row = bs$row[k], nrows = bs$nrows[k])

# Get time series
ts <- lapply(1:nrow(v[[1]]), function(i){
# Get time series dates
if(any(names(v) %in% "doy")){
timeline <- dtwSat::getDatesFromDOY(year = format(dtwSat::index(x), "%Y"), doy = v[["doy"]][i,])
} else {
timeline <- dtwSat::index(x)
}
data.frame(sapply(v[!(names(v) %in% "doy")], function(v) v[i, , drop = FALSE]), date = timeline)
})

# Apply TWDTW analysis
twdtw_results <- parallel::mclapply(ts, mc.cores = ncores, FUN = twdtwReduceTime, y = y, breaks = breaks, ...)

twdtw_results <- data.table::rbindlist(twdtw_results)[,c("label","distance")]
twdtw_label <- matrix(twdtw_results$label, ncol = length(breaks)-1, byrow = TRUE)
twdtw_distance <- matrix(twdtw_results$distance, ncol = length(breaks)-1, byrow = TRUE)

# Get best matches for each point, period, and pattern
m <- length(levels)
h <- length(breaks) - 1

rows <- seq(from = bs$array_rows[k], by = 1, length.out = bs$nrows[k]*out[[1]]@ncols)
if (!is.null(filename)) {
writeValues(out$label, twdtw_label, bs$row[k])
writeValues(out$distance, twdtw_distance, bs$row[k])
} else {
vv$label[rows,] <- twdtw_label
vv$distance[rows,] <- twdtw_distance
}

pbStep(pb, k)

}

if (!is.null(filename)) {
out <- lapply(out, writeStop)
} else {
out <- lapply(seq_along(out), function(i) setValues(out[[i]], values = vv[[i]]))
}

pbClose(pb)

names(out) <- c("label", "distance")

twdtwRaster(Class = out$label, Distance = out$distance, ..., timeline = breaks[-1],
labels = c(levels, "unclassified"), levels = c(seq_along(levels), fill), filepath = filepath)

}
63 changes: 44 additions & 19 deletions R/twdtw_reduce_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
#' @rdname twdtwReduceTime
#'
#' @description This function is a minimalist implementation of
#' \link[dtwSat]{twdtwApply} that is in average 3x faster. It does not keep any
#' intermediate data. It performs a multidimensional TWDTW analysis
#' \link[dtwSat]{twdtwApply} that is in average 3x faster. The time weight function
#' is coded in Fortran. It does not keep any intermediate data.
#' It performs a multidimensional TWDTW analysis
#' \insertCite{Maus:2019}{dtwSat} and retrieves only the best matches between
#' the unclassified time series and the patterns for each defined time interval.
#'
Expand All @@ -28,7 +29,6 @@
#' \dontrun{
#'
#' library(dtwSat)
#' log_fun = logisticWeight(-0.1, 50)
#' from = "2009-09-01"
#' to = "2017-09-01"
#' by = "12 month"
Expand All @@ -47,20 +47,20 @@
#' rbenchmark::benchmark(
#' original = twdtwClassify(twdtwApply(x = tw_ts, y = tw_patt, weight.fun = log_fun),
#' from = from, to = to, by = by)[[1]],
#' minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, weight.fun = log_fun,
#' minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt,
#' from = from, to = to, by = by)
#' )
#' }
#'
#' @export
twdtwReduceTime = function(x,
y,
weight.fun = NULL,
dist.method = "Euclidean",
step.matrix = symmetric1,
from = NULL,
to = NULL,
by = NULL,
breaks = NULL,
overlap = .5,
fill = 255){

Expand Down Expand Up @@ -88,22 +88,10 @@ twdtwReduceTime = function(x,
doyy <- as.numeric(format(ty, "%j"))
doyx <- as.numeric(format(tx, "%j"))

# if(!is.null(weight.fun)){
# # Get day of the year for pattern and time series
# doyy <- as.numeric(format(ty, "%j"))
# doyx <- as.numeric(format(tx, "%j"))
#
# # Compute time-weght matrix
# w <- .g(proxy::dist(doyy, doyx, method = dist.method))
#
# # Apply time-weight to local cost matrix
# cm <- weight.fun(cm, w)
# }
# Compute accumulated DTW cost matrix
#internals <- dtwSat:::.computecost(cm = cm, step.matrix = step.matrix)
xm = na.omit(cbind(doyx, as.matrix(px)))
ym = na.omit(cbind(doyy, as.matrix(py)))
internals = .computecost_fast(xm, ym, step.matrix)
internals = .fast_twdtw(xm, ym, step.matrix)

# Find all low cost candidates
a <- internals$startingMatrix[internals$N,1:internals$M]
Expand Down Expand Up @@ -132,7 +120,10 @@ twdtwReduceTime = function(x,
aligs <- data.table::rbindlist(aligs)

# Create classification intervals
breaks <- seq(as.Date(from), as.Date(to), by = by)
if(is.null(breaks)){
breaks <- seq(as.Date(from), as.Date(to), by = by)
}

# Find best macthes for the intervals
best_matches <- .bestmatches(
x = list(aligs[order(aligs$from, aligs$from),,drop=FALSE]),
Expand Down Expand Up @@ -187,3 +178,37 @@ twdtwReduceTime = function(x,
res$M = m
res
}

# @useDynLib dtwSat computecost
.fast_twdtw = function(xm, ym, step.matrix){

# cm = rbind(0, cm)
n = nrow(ym)
m = nrow(xm)
d = ncol(ym)

if(is.loaded("fast_twdtw", PACKAGE = "dtwSat", type = "Fortran")){
out = .Fortran(fast_twdtw,
XM = matrix(as.double(xm), m, d),
YM = matrix(as.double(ym), n, d),
CM = matrix(as.double(0), n+1, m),
DM = matrix(as.integer(0), n+1, m),
VM = matrix(as.integer(0), n+1, m),
SM = matrix(as.integer(step.matrix), nrow(step.matrix), ncol(step.matrix)),
N = as.integer(n),
M = as.integer(m),
D = as.integer(d),
NS = as.integer(nrow(step.matrix)))
} else {
stop("Fortran fast_twdtw lib is not loaded")
}
# sqrt(sum((ym[1,-1] - xm[1,-1])*(ym[1,-1] - xm[1,-1])))
res = list()
res$costMatrix = out$CM[-1,]
res$directionMatrix = out$DM[-1,]
res$startingMatrix = out$VM[-1,]
res$stepPattern = step.matrix
res$N = n
res$M = m
res
}
4 changes: 2 additions & 2 deletions examples/benchmark_minimalist.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
library(dtwSat)
log_fun = logisticWeight(-0.1, 50)
from = "2009-09-01"
to = "2017-09-01"
to = "2017-08-31"
by = "12 month"

# S4 objects for original implementation
Expand All @@ -15,6 +15,6 @@ mn_ts <- read.csv(system.file("reduce_time/ts_MODIS13Q1.csv", package = "dtwSat"
# Benchtmark
rbenchmark::benchmark(
original = twdtwClassify(twdtwApply(x = tw_ts, y = tw_patt, weight.fun = log_fun), from = from, to = to, by = by)[[1]],
minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, weight.fun = log_fun, from = from, to = to, by = by)
minimalist = twdtwReduceTime(x = mn_ts, y = mn_patt, from = from, to = to, by = by)
)

81 changes: 81 additions & 0 deletions examples/fast_twdtw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
\dontrun{

# Example of TWDTW analysis using raster files
library(dtwSat)
library(caret)

# Load raster data
evi <- brick(system.file("lucc_MT/data/evi.tif", package = "dtwSat"))
ndvi <- brick(system.file("lucc_MT/data/ndvi.tif", package = "dtwSat"))
red <- brick(system.file("lucc_MT/data/red.tif", package = "dtwSat"))
blue <- brick(system.file("lucc_MT/data/blue.tif", package = "dtwSat"))
nir <- brick(system.file("lucc_MT/data/nir.tif", package = "dtwSat"))
mir <- brick(system.file("lucc_MT/data/mir.tif", package = "dtwSat"))
doy <- brick(system.file("lucc_MT/data/doy.tif", package = "dtwSat"))
timeline <-
scan(system.file("lucc_MT/data/timeline", package = "dtwSat"), what="date")

# Create raster time series
rts <- twdtwRaster(evi, ndvi, red, blue, nir, mir, timeline = timeline, doy = doy)

# Load field samples and projection
field_samples <-
read.csv(system.file("lucc_MT/data/samples.csv", package = "dtwSat"))
proj_str <-
scan(system.file("lucc_MT/data/samples_projection", package = "dtwSat"),
what = "character")

# Split samples for training (10%) and validation (90%) using stratified sampling
set.seed(1)
I <- unlist(createDataPartition(field_samples$label, p = 0.1))
training_samples <- field_samples[I, ]
validation_samples <- field_samples[-I, ]

# Get time series form raster
training_ts <- getTimeSeries(rts, y = training_samples, proj4string = proj_str)
validation_ts <- getTimeSeries(rts, y = validation_samples, proj4string = proj_str)

# Create temporal patterns
temporal_patterns <- createPatterns(training_ts, freq = 8, formula = y ~ s(x))

# Set TWDTW weight function
# log_fun <- logisticWeight(-0.1, 50)

# Run fast-TWDTW analysis
system.time(
# The logistic time weigh is codeded in Fortran: TODO: add logit parameters to function call
# parallel uses parallel::mclapply - not so much implementation
fast_lucc <- dtwSat:::fasttwdtwApply(x = rts, y = temporal_patterns, ncores = 1, progress = 'text')
)

# Plot TWDTW distances for the first year
plot(fast_lucc, type = "distance", time.levels = 1)

# Plot TWDTW classification results
plot(fast_lucc, type = "map")

# Assess classification
twdtw_assess <-
twdtwAssess(object = fast_lucc, y = validation_samples,
proj4string = proj_str, conf.int = .95)

# Plot map accuracy
plot(twdtw_assess, type = "accuracy")

# Plot area uncertainty
plot(twdtw_assess, type = "area")

# Plot misclassified samples
plot(twdtw_assess, type = "map", samples = "incorrect")

# Get latex table with error matrix
twdtwXtable(twdtw_assess, table.type = "matrix")

# Get latex table with error accuracy
twdtwXtable(twdtw_assess, table.type = "accuracy")

# Get latex table with area uncertainty
twdtwXtable(twdtw_assess, table.type = "area")

}

4 changes: 4 additions & 0 deletions man/twdtwReduceTime.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1883dd3

Please sign in to comment.