diff --git a/inst/doc/applying_twdtw.R b/inst/doc/applying_twdtw.R index 75c64c8..62b3fe7 100644 --- a/inst/doc/applying_twdtw.R +++ b/inst/doc/applying_twdtw.R @@ -5,6 +5,7 @@ opts_chunk$set( message = FALSE, error = FALSE, results = "hide", + cache.path = "./cache/", cache = FALSE, comment = "" ) @@ -27,15 +28,16 @@ if (other_user) { ## ---- echo=FALSE, eval = TRUE, cache = FALSE----------------------------- # Install dtwSat package -#install.packages("dtwSat_0.2.0.9000.tar.gz") +#install.packages("dtwSat_0.2.2.9000.tar.gz", repos = NULL) ## ---- echo=FALSE, eval = TRUE, cache = FALSE----------------------------- library(dtwSat) library(ggplot2) library(scales) -library(reshape2) +library(Hmisc) new_theme = theme_get() +new_theme$text$family = "Helvetica" new_theme$text$size = 8 old_theme = theme_set(new_theme) @@ -55,19 +57,19 @@ df_dist$y = 1.8 plotMatches(mat, attr="evi", k=n) + ylab("Time series Pattern") + geom_text(data=df_dist, mapping = aes_string(x='to', y='y', label='label'), - size = 2) + + size = 2, family="Helvetica") + theme(legend.position="none") -## ---- echo = TRUE, eval = TRUE, results = 'markup'----------------------- -ts = twdtwTimeSeries(MOD13Q1.ts, labels="Time series") -patterns_ts = twdtwTimeSeries(MOD13Q1.patterns.list) -MOD13Q1.ts.labels - ## ---- echo = TRUE, eval = TRUE, results = 'markup'----------------------- library(dtwSat) names(MOD13Q1.patterns.list) head(MOD13Q1.ts, n = 2) +## ---- echo = TRUE, eval = TRUE, results = 'markup'----------------------- +ts = twdtwTimeSeries(MOD13Q1.ts, labels="Time series") +patterns_ts = twdtwTimeSeries(MOD13Q1.patterns.list) +patterns_ts + ## ----example-timeseries, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/3, fig.align='center', fig.cap='Example of time series based on MODIS product MOD13Q1 \\citep{Friedl:2010}. The labels of the phenological cycle are shown in the plot.', fig.pos='!h'---- plot(ts, type = "timeseries") + annotate(geom = "text", x = MOD13Q1.ts.labels$from+90, y = 0.98, @@ -103,9 +105,9 @@ df_weight = melt(df_weight, id.vars = "Difference") names(df_weight)[-1] = c("Functions","Weight") ggplot(df_weight, aes_string(x="Difference", y="Weight", group="Functions", linetype="Functions")) + geom_line() + xlab("Time difference (days)") + - theme(text = element_text(size = 10), - plot.title = element_text(size = 10, face="bold"), - axis.title = element_text(size = 10), + theme(text = element_text(size = 10, family="Helvetica"), + plot.title = element_text(size = 10, family="Helvetica", face="bold"), + axis.title = element_text(size = 10, family="Helvetica"), legend.position = c(.3,.85), legend.background = element_rect(fill="transparent")) + scale_linetype(guide_legend(title = "")) @@ -115,7 +117,7 @@ plot(matches, type="matches", patterns.labels="Soybean", k=4) ## ----alignments-all-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.5, fig.align='center', fig.cap=c('Alignments and dissimilarity measures of the patterns "soybean", "cotton", and "maize" to the subintervals of the long-term time series using a logistic time-weight. The solid black line is the EVI time series, and the coloured lines are the alignments of the patterns that have dissimilarity measure lower than three.'), fig.pos='!h'---- plot(matches, type="alignments", attr = "evi", threshold = 3.0) -## ----time-series-classification, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.5, fig.align='center', fig.cap=c('Classification of each 6 months periods of the time series using results of the TWDTW analysis with logistic time-weight. The solid lines are the attributes of the time series, the background colours indicate the classification of the periods.'), fig.pos='!h'---- +## ----time-series-classification, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.8, fig.align='center', fig.cap=c('Classification of each 6 months periods of the time series using results of the TWDTW analysis with logistic time-weight. The solid lines are the attributes of the time series, the background colours indicate the classification of the periods.'), fig.pos='!ht'---- ts_classification = twdtwClassify(x = matches, from = as.Date("2009-09-01"), to = as.Date("2013-09-01"), by = "6 month", overlap = 0.5) @@ -135,28 +137,50 @@ ndvi = brick(paste(data_folder,"ndvi.tif", sep = "/")) day_of_year = brick(paste(data_folder,"doy.tif", sep = "/")) dates = scan(paste(data_folder,"timeline", sep = "/"), what = "dates") +## ---- echo = TRUE, eval = TRUE------------------------------------------- +raster_timeseries = twdtwRaster(blue, red, nir, mir, evi, ndvi, + timeline = dates, doy = day_of_year) + ## ---- echo = TRUE, eval = TRUE, results = 'markup'----------------------- field_samples = read.csv(paste(data_folder,"samples.csv", sep = "/")) -head(field_samples, 2) +head(field_samples, 5) table(field_samples[["label"]]) proj_str = scan(paste(data_folder,"samples_projection", sep = "/"), what = "character") proj_str -## ---- echo = TRUE, eval = TRUE------------------------------------------- -raster_timeseries = twdtwRaster(blue, red, nir, mir, evi, ndvi, - timeline = dates, doy = day_of_year) - ## ---- echo = TRUE, eval = TRUE, results = 'markup'----------------------- field_samples_ts = getTimeSeries(raster_timeseries, y = field_samples, proj4string = proj_str) field_samples_ts +## ---- echo = TRUE, eval = FALSE, results = 'markup'---------------------- +# set.seed(1) +# log_fun = logisticWeight(alpha=-0.1, beta=50) +# cross_validation = twdtwCrossValidate(field_samples_ts, times=100, p=0.1, +# freq = 8, formula = y ~ s(x, bs="cc"), weight.fun = log_fun) + +## ---- echo = FALSE, eval = TRUE------------------------------------------ +load(system.file("lucc_MT/cross_validation.RData", package = "dtwSat")) + +## ----plot-accuracy, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_width/2, fig.align='center', fig.cap='User\'s and producer\'s accuracy of the TWDTW cross-validation using 100 resampling-with-replacement. The plot shows the 95\\% confidence interval of the mean.', fig.pos='!ht'---- +plot(cross_validation, conf.int=.95) + +## ---- echo = FALSE, eval = TRUE, results = 'asis'------------------------ +twdtwXtable(cross_validation, conf.int=.95, digits = 2, caption="\\label{tab:cross-validation} User\'s and producer\'s accuracy of the TWDTW cross-validation using 100 resampling-with-replacement. The table shows the standard deviation ($\\sigma$) and the 95\\% confidence interval (ci) of the mean ($\\mu$).'", comment = FALSE, caption.placement = "bottom", table.placement="!ht") + +## ---- echo = TRUE, eval = TRUE------------------------------------------- +library(caret) +set.seed(1) +I = unlist(createDataPartition(field_samples[,"label"], p = 0.1)) +training_ts = subset(field_samples_ts, I) +validation_samples = field_samples[-I,] + ## ---- echo = TRUE, eval = TRUE------------------------------------------- temporal_patterns = - createPatterns(field_samples_ts, freq = 8, formula = y ~ s(x)) + createPatterns(training_ts, freq = 8, formula = y ~ s(x)) -## ----temporal-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_width/1.5, fig.align='center', fig.pos='!h', fig.cap='Temporal patterns of forest, cotton-fallow, soybean-cotton, soybean-maize, and soybean-millet based on the ground truth samples.'---- +## ----temporal-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_width/1.5, fig.align='center', fig.pos='!h', fig.cap='Temporal patterns of Forest, Cotton-fallow, Soybean-cotton, Soybean-maize, and Soybean-millet based on the ground truth samples.'---- plot(temporal_patterns, type = "patterns") + theme(legend.position = c(.8,.25)) @@ -165,79 +189,40 @@ log_fun = logisticWeight(alpha=-0.1, beta=50) twdtw_dist = twdtwApply(x = raster_timeseries, y = temporal_patterns, overlap = 0.5, weight.fun = log_fun, overwrite=TRUE, format="GTiff") -## ----plot-dissmilarity2013, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Illustration of the TWDTW dissimilarity from each temporal pattern in 2013. The blue areas are more similar to the pattern and the red areas are less similar to the pattern.', fig.pos='!h'---- -plot(x = twdtw_dist, type="distance", time.levels = 6) +## ----plot-dissmilarity2008, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Illustration of the TWDTW dissimilarity from each temporal pattern in 2008. The blue areas are more similar to the pattern and the red areas are less similar to the pattern.', fig.pos='!ht'---- +plot(x = twdtw_dist, type="distance") -## ---- echo = TRUE, eval = TRUE------------------------------------------- -land_use_maps = twdtwClassify(twdtw_dist, format="GTiff", overwrite=TRUE) +## ---- echo = TRUE, eval = TRUE, results = 'markup'----------------------- +land_cover_maps = twdtwClassify(twdtw_dist, format="GTiff", overwrite=TRUE) -## ----plot-map, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Land use maps for each year from 2008 to 2013.', fig.pos='!h'---- -plot(x = land_use_maps, type = "maps") +## ----plot-map, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Land cover maps for each year from 2008 to 2013.', fig.pos='!ht'---- +plot(x = land_cover_maps, type = "maps") -## ----plot-area, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Percentage of area for each land use class from 2008 to 2013.', fig.pos='!h'---- -plot(x = land_use_maps, type = "area") +## ----plot-area, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Percentage of area for each land cover class from 2008 to 2013.', fig.pos='!ht'---- +plot(x = land_cover_maps, type = "area") ## ----plot-change, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Gains and losses in area from the other classes. The $y$ axis shows the actual class; the positive direction of $x$ axis shows the gains and the negative direction of $x$ axis shows the losses of the classes indicated in $y$. The colors indicate from/to which classes the gains/losses belong.', fig.pos='!h'---- -plot(x = land_use_maps, type = "changes") +plot(x = land_cover_maps, type = "changes") -## ----plot-dissmilarity, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='TWDTW dissimilarity measure for each pixel over each classified period. The blue areas have high confidence and the red areas have low confidence in the classification.', fig.pos='!h'---- -plot(x = land_use_maps, type="distance") +## ----plot-dissmilarity, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='TWDTW dissimilarity measure for each pixel over each classified period. The blue areas have high confidence and the red areas have low confidence in the classification.', fig.pos='!ht'---- +plot(x = land_cover_maps, type="distance") -## ---- echo = TRUE, eval = FALSE------------------------------------------ -# set.seed(1) -# partitions = splitDataset(field_samples_ts, p=0.1, times=100, -# freq = 8, formula = y ~ s(x, bs="cc")) +## ---- echo = TRUE, eval = TRUE------------------------------------------- +maps_assessment = twdtwAssess(land_cover_maps, y = validation_samples, + proj4string = proj_str, conf.int=.95) -## ---- echo = TRUE, eval = FALSE, results = 'markup'---------------------- -# log_fun = logisticWeight(alpha=-0.1, beta=50) -# twdtw_res = lapply(partitions, function(x){ -# res = twdtwApply(x = x$ts, y = x$patterns, weight.fun = log_fun, n=1) -# twdtwClassify(x = res) -# }) -# assessment = twdtwAssess(twdtw_res) -# head(assessment, 5) +## ---- echo = FALSE, eval = TRUE, results = 'asis'------------------------ +twdtwXtable(maps_assessment, table.type="errormatrix", digits = 0, rotate.col = TRUE, caption="\\label{tab:map-error-matrix}Error matrix of the map classification based on TWDTW analysis. The area is in the map unit, in this case $m^2$. $w$ is the proportion of area mapped for each class.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") -## ---- echo = FALSE, eval = TRUE------------------------------------------ -load(system.file("lucc_MT/cross_validation.RData", package = "dtwSat")) +## ----plot-map-incorrect-samples, echo = FALSE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Incorrect classified samples.', fig.pos='!ht'---- +plot(x = maps_assessment, type="map", samples="incorrect") -## ----plot-accuracy, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_width/2, fig.align='center', fig.cap='User\'s Accuracy (UA) and Producer\'s Accuracy (PA) of the TWDTW method for land cover classification. The plot shows the averages and their confidence interval for 99\\%.', fig.pos='!h'---- -df = melt(assessment[,-1], id="label") -df$variable = factor(df$variable, levels = c("UA", "PA"), labels = c("User's Accuracy", "Producer's Accuracy")) -ggplot(df, aes(x=label, y=value)) + - stat_summary(fun.data="mean_cl_boot", fun.args=list(conf.int = .99), - width=0.5, geom="crossbar", size=0.1, fill = "gray") + - geom_point(size=0.2) + facet_grid(. ~ variable) + - scale_y_continuous(limits = c(0,1), labels = percent, breaks = seq(0,1,.2)) + - xlab("") + ylab("Accuracy") + coord_flip() +## ---- echo = FALSE, eval = TRUE, results = 'asis'------------------------ +twdtwXtable(maps_assessment, table.type="accuracy", show.prop = TRUE, digits = 2, rotate.col = TRUE, caption="\\label{tab:map-accuracy}Accuracy and error matrix in proportion of area of the classified map.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") ## ---- echo = FALSE, eval = TRUE, results = 'asis'------------------------ -assess_mean = aggregate(assessment[, c("UA","PA")], list(assessment$label), mean) -assess_sd = aggregate(assessment[, c("UA","PA")], list(assessment$label), sd) -l_names = levels(assessment$label) -names(l_names) = l_names -ic_ua = t(sapply(l_names, function(i) 100*mean_cl_boot(x = assessment$UA[assessment$label==i], conf.int = .99)))[,-1] -ic_pa = t(sapply(l_names, function(i) 100*mean_cl_boot(x = assessment$PA[assessment$label==i], conf.int = .99)))[,-1] - -assess_table = data.frame( - Class = assess_mean$Group.1, - - MUA = sprintf("%.2f", round(100*assess_mean$UA,2)), - SDUA = sprintf("(%.2f)", round(100*assess_sd$UA,2)), - CIUA = sprintf("[%.2f-%.2f]", round(as.numeric(ic_ua[,1]),2), round(as.numeric(ic_ua[,2]),2)), - - MPA = sprintf("%.2f", round(100*assess_mean$PA,2)), - SDPA = sprintf("(%.2f)", round(100*assess_sd$PA,2)), - CIPA = sprintf("[%.2f-%.2f]", round(as.numeric(ic_pa[,1]),2), round(as.numeric(ic_pa[,2]),2)) - ) - -x_assess = xtable::xtable(assess_table, - format = tab_format, digits = 2, label = "tab:assessment", alig=c("l","c","c","c","c","c","c","c"), - caption="User\'s and Producer\'s Accuracy of the land use classification based on TWDTW analysis. $\\mu$ is the average accuracy, $\\sigma$ the standard deviation, and CI is the confidence interval of 99\\% using 100 resampling-with-replacement.") - -addtorow = list() -addtorow$pos = list(0) -addtorow$command = paste("Class & \\multicolumn{3}{c}{User's Accuracy (UA) \\%} & \\multicolumn{3}{c}{Producer's Accuracy (PA)\\%}\\\\", paste(c("","$\\mu$","$\\sigma$","CI","$\\mu$","$\\sigma$","CI"), collapse="&"), "\\\\", collapse = "") - -xtable::print.xtable(x_assess, add.to.row=addtorow, include.colnames = FALSE, include.rownames = FALSE, - comment = FALSE, caption.placement = "bottom") +twdtwXtable(maps_assessment, table.type="area", digits = 0, rotate.col = TRUE, caption="\\label{tab:map-adjusted-area}Mapped and adjusted, accumulated over the whole period, i.e. the sum from the sum of the maps from 2008 to 2013. The area is in the map unit, in this case $m^2$.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") + +## ----plot-area-and-uncertainty, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.7, fig.align='center', fig.cap='Mapped and adjusted, accumulated over the whole period, i.e. the sum from the sum of the maps from 2008 to 2013. The area is in the map unit, in this case $m^2$.', fig.pos='!ht'---- +plot(x = maps_assessment, type="area", perc=FALSE) diff --git a/inst/doc/applying_twdtw.Rmd b/inst/doc/applying_twdtw.Rmd index 4fbe645..7c8ae09 100644 --- a/inst/doc/applying_twdtw.Rmd +++ b/inst/doc/applying_twdtw.Rmd @@ -1,17 +1,17 @@ --- author: - name: Victor Maus - affiliation: INPE - affiliation2: University of Münster + affiliation: University of Münster + affiliation2: INPE, IIASA address: > - Image Processing Division\newline - National Institute for Space Research\newline - Av. dos Astronautas, 1758\newline - 12227-010 São José dos Campos, Brazil + Ecosystems Services and Management Program\newline + International Institute for Applied Systems Analysis\newline + Schlossplatz 1\newline + A-2361 Laxenburg, Austria email: vwmaus1@gmail.com - url: \url{www.dpi.inpe.br/maus} - Telephone: +55/12/3208-7330 - - name: "Gilberto Câmara" + url: \url{www.iiasa.ac.at/staff/maus} + Telephone: +43/2236-807-550 + - name: Gilberto Câmara affiliation: INPE affiliation2: University of Münster - name: Marius Appel @@ -19,27 +19,25 @@ author: - name: Edzer Pebesma affiliation: University of Münster title: - formatted: "\\pkg{dtwSat}: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in \\proglang{R}$^1$" + formatted: "\\pkg{dtwSat}: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in \\proglang{R}" # If you use tex in the formatted title, also supply version without plain: "dtwSat: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in R" # For running headers, if needed short: "\\pkg{dtwSat}: Time-Weighted Dynamic Time Warping" abstract: > - The opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. However, few of the proposed methods for remote sensing time series analysis are available as open source software. In this paper we present the \proglang{R} package \pkg{dtwSat}. This package provides an implementation of the Time-Weighted Dynamic Time Warping method for land use and land cover mapping using sequence of multi-band satellite images. Methods based on dynamic time warping are flexible to handle irregular sampling and out-of-phase time series, and they have achieved significant results in time series analysis. \pkg{dtwSat} is available from the Comprehensive R Archive Network and contributes to making methods for satellite time series analysis available to a larger audience. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and evaluating the results. + The opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. However, few of the proposed methods for remote sensing time series analysis are available as open source software. In this paper we present the \proglang{R} package \pkg{dtwSat}. This package provides an implementation of the Time-Weighted Dynamic Time Warping method for land cover mapping using sequence of multi-band satellite images. Methods based on dynamic time warping are flexible to handle irregular sampling and out-of-phase time series, and they have achieved significant results in time series analysis. \pkg{dtwSat} is available from the Comprehensive R Archive Network and contributes to making methods for satellite time series analysis available to a larger audience. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and assessing the results. keywords: # at least one keyword must be supplied - formatted: [dynamic programming, MODIS time series, land use changes, crop monitoring] - plain: [dynamic programming, MODIS time series, land use changes, crop monitoring] + formatted: [dynamic programming, MODIS time series, land cover changes, crop monitoring] + plain: [dynamic programming, MODIS time series, land cover changes, crop monitoring] preamble: > \usepackage{amsmath} \usepackage{array} \usepackage{caption} \usepackage{subcaption} \usepackage{float} - \usepackage{framed} - \usepackage{listings} - \usepackage{siunitx} - \usepackage{latexsym} + \usepackage{microtype} + \setlength{\tabcolsep}{4pt} documentclass: nojss classoption: shortnames output: @@ -51,7 +49,7 @@ vignette: > %\VignetteIndexEntry{dtwSat: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} - %\VignetteDepends{rticles, dtwSat, ggplot2, scales, Hmisc, reshape2, xtable, tikzDevice} + %\VignetteDepends{rticles, dtwSat, ggplot2, scales, Hmisc, reshape2, tikzDevice} --- @@ -62,6 +60,7 @@ opts_chunk$set( message = FALSE, error = FALSE, results = "hide", + cache.path = "./cache/", cache = FALSE, comment = "" ) @@ -83,11 +82,9 @@ if (other_user) { #options(tikzDocumentDeclaration = "\\documentclass{jss}\\usepackage{siunitx}\\usepackage{latexsym}") ``` - - ```{r , echo=FALSE, eval = TRUE, cache = FALSE} # Install dtwSat package -#install.packages("dtwSat_0.2.0.9000.tar.gz") +#install.packages("dtwSat_0.2.2.9000.tar.gz", repos = NULL) ``` @@ -95,9 +92,10 @@ if (other_user) { library(dtwSat) library(ggplot2) library(scales) -library(reshape2) +library(Hmisc) new_theme = theme_get() +new_theme$text$family = "Helvetica" new_theme$text$size = 8 old_theme = theme_set(new_theme) @@ -106,30 +104,28 @@ page_width = 5.590551#in 14.2#cm page_height = 9.173228#in 23.3#cm ``` -\footnotetext[1]{This vignette is based on the paper: MAUS, V.; CAMARA, G.; APPEL, M.; PEBESMA, E. dtwSat: Time-Weighted Dynamic Time Warping for satellite image time series analysis in R. Submitted to the Journal of Statistical Software.} - \sloppy # Introduction - -Remote sensing images are the most widely used data source for measuring land use and land cover change (LUCC). In many areas, remote sensing images are the only data available for this purpose [@Lambin:2006; @Fritz:2013]. Recently, the opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. These methods include multi-image compositing [@Griffiths:2013], detecting forest disturbance and recovery [@Kennedy:2010; @Zhu:2012; @DeVries:2015], crop classification [@Xiao:2005; @Wardlow:2007; @Petitjean:2012; @Maus:2016], planted forest mapping [@Maire:2014], crop expansion and intensification [@Galford:2008; @Sakamoto:2009], detecting trend and seasonal changes [@Lunetta:2006; @Verbesselt:2010; @Verbesselt:2010a; @Verbesselt:2012], and extracting seasonality metrics from satellite time series [@Jonsson:2002; @Jonsson:2004]. Given the open availability of large image data sets, the research community on Earth Observation would get much benefit from methods that are openly available, reproducible and comparable. However, few of the proposed methods for remote sensing time series analysis are available as open source software, the main exception being the BFAST and BFAST-monitor algorithms for change detection [@Verbesselt:2010; @Verbesselt:2010a]. This paper is a contribution to making methods for satellite time series analysis available to a larger audience. +Remote sensing images are the most widely used data source for measuring land use and land cover change (LUCC). In many areas, remote sensing images are the only data available for this purpose [@Lambin:2006; @Fritz:2013]. Recently, the opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. These methods include multi-image compositing [@Griffiths:2013], detecting forest disturbance and recovery [@Kennedy:2010; @Zhu:2012; @DeVries:2015], crop classification [@Xiao:2005; @Wardlow:2007; @Petitjean:2012; @Maus:2016], planted forest mapping [@Maire:2014], crop expansion and intensification [@Galford:2008; @Sakamoto:2009], detecting trend and seasonal changes [@Lunetta:2006; @Verbesselt:2010; @Verbesselt:2010a; @Verbesselt:2012], and extracting seasonality metrics from satellite time series [@Jonsson:2002; @Jonsson:2004]. Given the open availability of large image data sets, the Earth Observation community would get much benefit from methods that are openly available, reproducible and comparable. However, few of the proposed methods for remote sensing time series analysis are available as open source software, the main exception being the BFAST and BFAST-monitor algorithms for change detection [@Verbesselt:2010; @Verbesselt:2010a]. This paper is a contribution to making methods for satellite time series analysis available to a larger audience. In this paper we describe the \pkg{dtwSat} package, written in \proglang{R} [@R:2015] and \proglang{Fortran} programming languages, and available from the Comprehensive R Archive Network at [http://CRAN.R-project.org/package=dtwSat](http://CRAN.R-project.org/package=dtwSat). The package provides an implementation of Time-Weighted Dynamic Time Warping (TWDTW) [@Maus:2016] for satellite image time series analysis. -The TWDTW method is an adaptation of the well-known dynamic time warping (DTW) method for time series analysis [@Velichko:1970; @Sakoe:1971; @Sakoe:1978; @Rabiner:1993; @Berndt:1994; @Keogh:2005; @Muller:2007] for land use and land cover classification. The standard DTW compares a temporal signature of a known event (*e.g.*, a person's speech) with an unknown time series. It finds all possible alignments between two time series and provides a dissimilarity measure [@Rabiner:1993]. In contrast to standard DTW, the TWDTW method is sensitive to seasonal changes of natural and cultivated vegetation types. It also considers inter-annual climatic and seasonal variability. In a tropical forest area, the method has achieved a high accuracy for mapping classes of single cropping, double cropping, forest, and pasture [@Maus:2016]. +The TWDTW method is an adaptation of the well-known dynamic time warping (DTW) method for time series analysis [@Velichko:1970; @Sakoe:1971; @Sakoe:1978; @Rabiner:1993; @Berndt:1994; @Keogh:2005; @Muller:2007] for land cover classification. The standard DTW compares a temporal signature of a known event (*e.g.*, a person's speech) with an unknown time series. It finds all possible alignments between two time series and provides a dissimilarity measure [@Rabiner:1993]. In contrast to standard DTW, the TWDTW method is sensitive to seasonal changes of natural and cultivated vegetation types. It also considers inter-annual climatic and seasonal variability. In a tropical forest area, the method has achieved a high accuracy for mapping classes of single cropping, double cropping, forest, and pasture [@Maus:2016]. + +We chose \proglang{R} because it is an open source software that offers a large number of reliable packages. The \pkg{dtwSat} package builds upon on a number of graphical and statistical tools in \proglang{R}: \pkg{dtw} [@Giorgino:2009], \pkg{proxy} [@Meyer:2015], \pkg{zoo} [@Zeileis:2005], \pkg{mgcv} [@Wood:2000; @Wood:2003; @Wood:2004; @Wood:2006; @Wood:2011], \pkg{sp} [@Pebesma:2005; @Bivand:2013], \pkg{raster} [@Hijmans:2015], \pkg{caret} [@Kuhn:2016], and \pkg{ggplot2} [@Wickham:2009]. Other \proglang{R} packages that are related and useful for remote sensing and land cover analysis include \pkg{landsat} [@Goslee:2011], \pkg{rgdal} [@Bivand:2015], \pkg{spacetime} [@Pebesma:2012; @Bivand:2013], \pkg{bfast} [@Verbesselt:2010; @Verbesselt:2010a], \pkg{bfastmonitor} [@Verbesselt:2011], \pkg{bfastSpatial} [@Dutrieux:2014], \pkg{MODISTools} [@Tuck:2014], \pkg{maptools} [@Bivand:2015], and \pkg{lucc} [@Moulds:2015]. Using existing packages as building blocks, software developers in \proglang{R} save a lot of time and can concentrate on their intended goals. -We chose \proglang{R} because it is an open source software that offers a large number of reliable packages. The \pkg{dtwSat} package builds upon on a number of graphical and statistical tools in \proglang{R}: \pkg{dtw} [@Giorgino:2009], \pkg{proxy} [@Meyer:2015], \pkg{zoo} [@Zeileis:2005], \pkg{mgcv} [@Wood:2000; @Wood:2003; @Wood:2004; @Wood:2006; @Wood:2011], \pkg{sp} [@Pebesma:2005; @Bivand:2013], \pkg{raster} [@Hijmans:2015], \pkg{caret} [@Kuhn:2016], and \pkg{ggplot2} [@Wickham:2009]. Other \proglang{R} packages that are related and useful for remote sensing and land use analysis include \pkg{landsat} [@Goslee:2011], \pkg{rgdal} [@Bivand:2015], \pkg{spacetime} [@Pebesma:2012; @Bivand:2013], \pkg{bfast} [@Verbesselt:2010; @Verbesselt:2010a], \pkg{bfastmonitor} [@Verbesselt:2011], \pkg{bfastSpatial} [@Dutrieux:2014], \pkg{MODISTools} [@Tuck:2014], \pkg{maptools} [@Bivand:2015], and \pkg{lucc} [@Moulds:2015]. Using existing packages as building blocks, software developers in \proglang{R} save a lot of time and can concentrate on their intended goals. +There is already an \proglang{R} package that implements the standard DTW method for time series analysis: the \pkg{dtw} package [@Giorgino:2009]. In the \pkg{dtwSat} package, we focus on the specific case of satellite image time series analysis. The analysis method implemented in \pkg{dtwSat} package extends that of the \pkg{dtw} package; it adjusts the standard DTW method to account for the seasonality of different types of land cover. Our aim is to support the full cycle of land cover classification, from selecting sample patterns to visualising and assessing the final result. -There is already an \proglang{R} package that implements the standard DTW method for time series analysis: the \pkg{dtw} package [@Giorgino:2009]. In the \pkg{dtwSat} package, we focus on the specific case of satellite image time series analysis. The analysis method implemented in \pkg{dtwSat} package extends that of the \pkg{dtw} package; it adjusts the standard DTW method to account for the seasonality of different types of land cover. Our aim is to support the full cycle of land use and land cover classification, from selecting sample patterns to visualising and evaluating the final result. +This paper focuses on the motivation and guidance for using the TWDTW method for remote sensing applications. The full description of the method is available in a paper published by the lead author [@Maus:2016]. In what follows, the \autoref{the-time-weighted-dynamic-time-warping-method} describes the application of TWDTW [@Maus:2016] for satellite time series analysis. The \autoref{dtwsat-package-overview} gives an overview of the \pkg{dtwSat} package. Then, \autoref{classifying-a-time-series} focuses on the analysis of a single time series and shows some visualisation methods. We then present an example of a complete land cover change analysis for a study area in Mato Grosso, Brazil in \autoref{producing-a-land-cover-map}. -This paper focuses on the motivation and guidance for using the TWDTW method for remote sensing applications. The full description of the method is available in a paper published by the lead author [@Maus:2016]. In what follows, Section \ref{dtwsat-package-overview} gives an overview of the \pkg{dtwSat} package. The Section \ref{the-time-weighted-dynamic-time-warping-method} describes the application of TWDTW [@Maus:2016] for satellite time series analysis. Then, Section \ref{classifying-a-time-series} focuses on the analysis of a single time series and shows some visualisation methods. We then present an example of a complete land use and land cover change analysis for a study area in the Mato Grosso, Brazil in Section \ref{producing-a-land-cover-map}. # The Time-Weighted Dynamic Time Warping method -In this section, we describe the Time-Weighted Dynamic Time Warping (TWDTW) algorithm in general terms. For a detailed technical explanation, refer to @Maus:2016. TWDTW is time-constrained version of the Dynamic Time Warping (DTW) algorithm. Although the standard DTW method is good for shape matching [@Keogh:2005], it is not suited *per se* for satellite image time series analysis, since it disregards the temporal range when finding the best matches between two time series [@Maus:2016]. When using image time series for land cover classification, one needs to balance between shape matching and temporal alignment, since each land cover class has a distinct phenological cycle associated with the vegetation [@Reed:1994,@Zhang:2003]. For example, soybeans and maize cycles range from 90 to 120 days, whereas sugar-cane has a 360 to 720 days cycle. A time series with cycle larger than 180 days is unlikely to come from soybeans or maize. For this reason, @Maus:2016 include a time constraint in DTW to account for seasonality. The resulting method is capable of distinguishing different land use and land cover classes. +In this section, we describe the Time-Weighted Dynamic Time Warping (TWDTW) algorithm in general terms. For a detailed technical explanation, refer to @Maus:2016. TWDTW is time-constrained version of the Dynamic Time Warping (DTW) algorithm. Although the standard DTW method is good for shape matching [@Keogh:2005], it is not suited *per se* for satellite image time series analysis, since it disregards the temporal range when finding the best matches between two time series [@Maus:2016]. When using image time series for land cover classification, one needs to balance between shape matching and temporal alignment, since each land cover class has a distinct phenological cycle associated with the vegetation [@Reed:1994,@Zhang:2003]. For example, soybeans and maize cycles range from 90 to 120 days, whereas sugar-cane has a 360 to 720 days cycle. A time series with cycle larger than 180 days is unlikely to come from soybeans or maize. For this reason, @Maus:2016 include a time constraint in DTW to account for seasonality. The resulting method is capable of distinguishing different land cover classes. -The inputs to TWDTW are: (a) a set of time series of known temporal patterns (*e.g.*, phenological cycles of land cover classes); (b) an unclassified long-term satellite image time series. For each temporal pattern, the algorithm finds all matching subintervals in the long-term time series, providing a dissimilarity measure (cf. Figure \ref{fig:twdtw-example}). The result of the algorithm is a set of subintervals, each associated with a pattern and with a dissimilarity measure. We then break the unclassified time series in periods according to our needs (*e.g.*, yearly, seasonality, monthly). For each period, we consider all matching subintervals that intersect with it, and classify them based on the land cover class of the best matching subinterval. In this way, the long-term satellite time series is divided in periods, and each period is assigned a land cover class. +The inputs to TWDTW are: (a) a set of time series of known temporal patterns (*e.g.*, phenological cycles of land cover classes); (b) an unclassified long-term satellite image time series. For each temporal pattern, the algorithm finds all matching subintervals in the long-term time series, providing a dissimilarity measure (cf. \autoref{fig:twdtw-example}). The result of the algorithm is a set of subintervals, each associated with a pattern and with a dissimilarity measure. We then break the unclassified time series in periods according to our needs (*e.g.*, yearly, seasonality, monthly). For each period, we consider all matching subintervals that intersect with it, and classify them based on the land cover class of the best matching subinterval. In this way, the long-term satellite time series is divided in periods, and each period is assigned a land cover class. ```{r twdtw-example, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_height/3.5, fig.align='center', fig.cap='Matches of the known temporal pattern to subintervals of the long-term time series. The solid black line is the long-term time series, the colored lines are the different matches of the same pattern ordered by TWDTW dissimilarity measure, and the gray dashed lines are the matching points.', fig.pos='h'} n=4 @@ -143,12 +139,12 @@ df_dist$y = 1.8 plotMatches(mat, attr="evi", k=n) + ylab("Time series Pattern") + geom_text(data=df_dist, mapping = aes_string(x='to', y='y', label='label'), - size = 2) + + size = 2, family="Helvetica") + theme(legend.position="none") ``` -To use TWDTW for land use and land cover classification, we need the following data sets: +To use TWDTW for land cover classification, we need the following data sets: - A set of remote sensing time series for the study area. For example, a tile of a MODIS MOD13Q1 image consists of 4800 x 4800 pixels, covering an area of 10 degrees x 10 degrees at the Equator [@Friedl:2010]. A 15-year (2000-2015) MODIS MOD13Q1 set time series has 23 images per year, with a total of 23 million time series, each with 346 samples. @@ -156,7 +152,7 @@ To use TWDTW for land use and land cover classification, we need the following d - A set of ground truth points, with spatial and temporal information and land cover classification. These *ground truth* points are used for validation and accuracy assessment. -Based on the information provided by the user about the images to be analysed, our method maps them to a three-dimensional (3-D) array in space-time (Figure \ref{fig:3-D-array}). This array can have multiple attributes, such as the satellite bands (*e.g.*, "red", "nir", and "blue"), and derived indices (*e.g.*, "NDVI", "EVI", and "EVI2"). This way, each pixel location is associated to a sequence of measurements, building a satellite image time series. Figure \ref{fig:3-D-array} shows an example of "evi" time series for a location in the Brazilian Amazon from 2000 to 2008. In the first two years, the area was covered by forest that was cut in 2002. The area was then used for cattle raising (pasture) for three years, and then for crop production from 2006 to 2008. Satellite image time series are thus useful to describe the dynamics of the landscape and the land use trajectories. +Based on the information provided by the user about the images to be analysed, our method maps them to a three-dimensional (3-D) array in space-time (\autoref{fig:3-D-array}). This array can have multiple attributes, such as the satellite bands (*e.g.*, "red", "nir", and "blue"), and derived indices (*e.g.*, "NDVI", "EVI", and "EVI2"). This way, each pixel location is associated to a sequence of measurements, building a satellite image time series. \autoref{fig:3-D-array} shows an example of "EVI" time series for a location in the Brazilian Amazon from 2000 to 2008. In the first two years, the area was covered by forest that was cut in 2002. The area was then used for cattle raising (pasture) for three years, and then for crop production from 2006 to 2008. Satellite image time series are thus useful to describe the dynamics of the landscape and the land use trajectories. \begin{figure}[!h] \begin{center} @@ -168,6 +164,7 @@ Based on the information provided by the user about the images to be analysed, o \end{figure} + # dtwSat package overview \pkg{dtwSat} provides a set of functions for land cover change analysis using satellite image time series. This includes functions to build temporal patterns for land cover types, apply the TWDTW analysis using different weighting functions, visualise the results in a graphical interface, produce land cover maps, and create spatiotemporal plots for land changes. Therefore, \pkg{dtwSat} gives an end-to-end solution for satellite time series analysis, which users can make a complete land change analysis. @@ -185,6 +182,7 @@ The class \code{twdtwRaster} is used for satellite image time series. This class + # Classifying a time series This section describes how to classify one time series, using examples that come with the \pkg{dtwSat} package. We will show how to match three temporal patterns ("soybean", "cotton", and "maize") to subintervals of a long-term satellite image time series. These time series have been extracted from a set of MODIS MOD13Q1 [@Friedl:2010] images and include the vegetation indices "ndvi", "evi", and the original bands "nir", "red", "blue", and "mir". In this example, the classification of crop types for the long-term time series is known. @@ -193,17 +191,17 @@ This section describes how to classify one time series, using examples that come The inputs for the next examples are time series in \pkg{zoo} format. The first is an object of class \code{zoo} with a long-term time series, referred to as \code{MOD13Q1.ts}, and the second is a \code{list} of time series of class \code{zoo} with the temporal patterns of "soybean", "cotton", and "maize", referred to as \code{MOD13Q1.patterns.list}. -From \code{zoo} objects we construct time series of class \code{twdtwTimeSeries}, for which we have a set of visualization and analysis methods implemented in the \pkg{dtwSat} package. The code below builds two objects of class \code{twdtwTimeSeries}. The first has the long-term time series and second has the temporal patterns. We use the plot method types \code{timeseries} and \code{patterns} to shown the objects \code{ts} in Figure \ref{fig:example-timeseries} and \code{patterns_ts} in Figure \ref{fig:temporal-patterns-soy-cot-mai}, respectively. This plot method uses \code{ggplot} syntax. -```{r, echo = TRUE, eval = TRUE, results = 'markup'} -ts = twdtwTimeSeries(MOD13Q1.ts, labels="Time series") -patterns_ts = twdtwTimeSeries(MOD13Q1.patterns.list) -MOD13Q1.ts.labels -``` +From \code{zoo} objects we construct time series of class \code{twdtwTimeSeries}, for which we have a set of visualization and analysis methods implemented in the \pkg{dtwSat} package. The code below builds two objects of class \code{twdtwTimeSeries}. The first has the long-term time series and second has the temporal patterns. We use the plot method types \code{timeseries} and \code{patterns} to shown the objects \code{ts} in \autoref{fig:example-timeseries} and \code{MOD13Q1.ts} in \autoref{fig:temporal-patterns-soy-cot-mai}, respectively. This plot method uses \code{ggplot} syntax. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} library(dtwSat) names(MOD13Q1.patterns.list) head(MOD13Q1.ts, n = 2) ``` +```{r, echo = TRUE, eval = TRUE, results = 'markup'} +ts = twdtwTimeSeries(MOD13Q1.ts, labels="Time series") +patterns_ts = twdtwTimeSeries(MOD13Q1.patterns.list) +patterns_ts +``` @@ -216,7 +214,7 @@ plot(ts, type = "timeseries") + plot(patterns_ts, type = "patterns") ``` -TWDTW uses both amplitude and phase information to classify the phenological cycles in the long-term time series. The EVI peak of the "soybean" time series has a similar amplitude as that of "cotton". However, the "soybean" series peaks in late December while the "cotton" series peaks in early April. The EVI peak of the "maize" time series is at the same period as the peak of "cotton". However, the "maize" time series has smaller amplitude than the "cotton" one. Therefore, we can improve the time series classification by combining shape and time information. +TWDTW uses both amplitude and phase information to classify the phenological cycles in the long-term time series. The differences in the amplitude and phase of the cycles are more clear when we observe the EVI signal in Figures 3 and 4. The EVI peak of the "soybean" time series has a similar amplitude as that of "cotton". However, the "soybean" series peaks in late December while the "cotton" series peaks in early April. The EVI peak of the "maize" time series is at the same period as the peak of "cotton". However, the "maize" time series has smaller amplitude than the "cotton" one. Therefore, combining shape and time information we can improve the time series classification. ## Detection of time series patterns with TWDTW @@ -229,7 +227,7 @@ matches = slotNames(matches) show(matches) ``` -To retrieve the complete information of the matches we set \code{keep=TRUE}. We need this information for the plot methods of the class \code{twdtwMatches}. The argument \code{weight.fun} defines the time-weight to the dynamic time warping analysis [@Maus:2016]. By default the time-weight is zero, meaning that the function will run a standard dynamic time warping analysis. The arguments \code{x} and \code{y} are objects of class \code{twdtwTimeSeries} with the unclassified long-term time series and the temporal patterns, respectively. For details and other arguments see \code{?twdtwApply}. +To retrieve the complete information of the matches we set \code{keep=TRUE}. We need this information for the plot methods of the class \code{twdtwMatches}. The argument \code{weight.fun} defines the time-weight to the dynamic time warping analysis [@Maus:2016]. By default the time-weight is zero, meaning that the function will run a standard dynamic time warping analysis. The arguments \code{x} and \code{y} are objects of class \code{twdtwTimeSeries} with the unclassified long-term time series and the temporal patterns, respectively. To perform the alignment between the time series the default TWDTW recursion has a symmetric step (for more details and other recursion options see \code{?stepPattern}). @Giorgino:2009 provides a detaild discussion on the recursion steps and other step patterns. For further details and other arguments of the TWDTW analysis see \code{?twdtwApply}. In our example we use a logistic weight function for the temporal constraint of the TWDTW algorithm. This function is defined by \code{logisticWeight}. The \pkg{dtwSat} package provides two in-built functions: \code{linearWeight} and \code{logisticWeight}. The \code{linearWeight} function with slope \code{a} and intercept \code{b} is given by $$ @@ -241,7 +239,7 @@ $$ \omega = \frac{1}{1 + e^{-\alpha(g(t_1,t_2)-\beta)} }. \label{eq:nonlineartw} $$ -The function $g$ is the absolute difference in days between two dates, $t_1$ and $t_2$. The linear function creates a strong time constraint even for small time differences. The logistic function has a low weight for small time warps and significant costs for bigger time warps, cf. Figure \ref{fig:logist-time-weight}. In our previous studies [@Maus:2016] the logistic-weight had better results than the linear-weight for land cover classification. Users can define different weight functions as temporal constraints in the argument \code{weight.fun} of the \code{twdtwApply} method. +The function $g$ is the absolute difference in days between two dates, $t_1$ and $t_2$. The aim of these functions is to control the time warp, e.g. a "large time warp" is needed to match a point of the temporal pattern whose original date is January 1 to a point of the long-term time series whose date is July 1, on the other hand to match January 1 to December 15 has a "small time warp". If there is a large seasonal difference between the pattern and its matching point in time series, an extra cost is added to the DTW distance measure. This constraint controls the time warping and makes the time series alignment dependent on the seasons. This is especially useful for detecting temporary crops and for distinguishing pasture from agriculture. The linear function creates a strong time constraint even for small time differences, including small time warps. The logistic function has a low weight for small time warps and significant costs for bigger time warps, cf. \autoref{fig:logist-time-weight}. In our previous studies [@Maus:2016] the logistic-weight had better results than the linear-weight for land cover classification. Users can define different weight functions as temporal constraints in the argument \code{weight.fun} of the function \code{twdtwApply}. ```{r logist-time-weight, echo = FALSE, eval = TRUE, out.width=paste0(page_width/2,'in'), fig.align='center', fig.cap='Logistic time-weight function \\code{logisticWeight} with steepness \\code{alpha=-0.1} and midpoint \\code{beta=100}. The $x$ axis shows the absolute difference between two dates in days and the $y$ axis shows the time-weight \\citep{Maus:2016}.', fig.pos='!h'} # Maximum time difference in days max_diff = 366/2 @@ -262,9 +260,9 @@ df_weight = melt(df_weight, id.vars = "Difference") names(df_weight)[-1] = c("Functions","Weight") ggplot(df_weight, aes_string(x="Difference", y="Weight", group="Functions", linetype="Functions")) + geom_line() + xlab("Time difference (days)") + - theme(text = element_text(size = 10), - plot.title = element_text(size = 10, face="bold"), - axis.title = element_text(size = 10), + theme(text = element_text(size = 10, family="Helvetica"), + plot.title = element_text(size = 10, family="Helvetica", face="bold"), + axis.title = element_text(size = 10, family="Helvetica"), legend.position = c(.3,.85), legend.background = element_rect(fill="transparent")) + scale_linetype(guide_legend(title = "")) ``` @@ -273,43 +271,44 @@ ggplot(df_weight, aes_string(x="Difference", y="Weight", group="Functions", line \pkg{dtwSat} provides five ways to visualise objects of class \code{twdtwMatches} through the plot types: \code{matches}, \code{alignments}, \code{classification}, \code{path}, and \code{cost}. The plot type \code{matches} shows the matching points of the patterns in the long-term time series; the plot type \code{alignments} shows the alignments and dissimilarity measures; the plot type \code{path} shows the low cost paths in the TWDTW cost matrix; and the plot type \code{cost} allows the visualisation of the cost matrices (local cost, accumulated cost, and time cost); and the plot type \code{classification} shows the classification of the long-term time series based on the TWDTW analysis. The plot methods for class \code{twdtwMatches} return a \code{ggplot} object, so that users can further manipulate the result using the \pkg{ggplot2} package. For more details on visualisation functions, please refer to the \pkg{dtwSat} documentation in the CRAN [@Maus:2015a]. -We now describe the plot types \code{matches} and \code{alignments}. The code bellow shows how to visualise the matching points of the four best matches of "soybean" pattern in the long-term time series, cf. Figure \ref{fig:twdtw-matches}. +We now describe the plot types \code{matches} and \code{alignments}. The code bellow shows how to visualise the matching points of the four best matches of "soybean" pattern in the long-term time series, cf. \autoref{fig:twdtw-matches}. ```{r twdtw-matches, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/3.5, fig.align='center', fig.cap=c('The four best matches of the "soybean" pattern in the time series using a logistic time-weight. The solid black line is the long-term time series; the coloured lines are the temporal patterns; and the grey dashed lines are the respective matching points.'), fig.pos='!h'} plot(matches, type="matches", patterns.labels="Soybean", k=4) ``` -The next example (Figure \ref{fig:alignments-all-patterns}) uses the plot type \code{alignments} to show the alignments of the temporal patterns. We set the threshold for the dissimilarity measure to be lower than $3.0$. This is useful to display the different subintervals of the long-term time series that have at least one alignment whose dissimilarity is less than the specified threshold. +The next example uses the plot type \code{alignments} to show the alignments of the temporal patterns (see \autoref{fig:alignments-all-patterns}). We set the threshold for the dissimilarity measure to be lower than $3.0$. This plot displays the different subintervals of the long-term time series that have alignments whose dissimilarity is less than the specified threshold. ```{r alignments-all-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.5, fig.align='center', fig.cap=c('Alignments and dissimilarity measures of the patterns "soybean", "cotton", and "maize" to the subintervals of the long-term time series using a logistic time-weight. The solid black line is the EVI time series, and the coloured lines are the alignments of the patterns that have dissimilarity measure lower than three.'), fig.pos='!h'} plot(matches, type="alignments", attr = "evi", threshold = 3.0) ``` +\autoref{fig:alignments-all-patterns} shows the alignments of each pattern over the long-term time series, note that we can rank the alignments by their TWDTW dissimilarity, i.e. alignments overlapping the same period usually have distinct dissimilarity, which can be used to rank them. In the figure we can see that maize (blue lines) and cotton (green lines) overlap approximately the same time periods, however, they have distinct dissimilarity measures, and therefore, can be ranked. Observing the time period from January 2010 to July 2010, both soybean, maize, and cotton have at least one overlapping alignment, however in this case the cotton pattern matches better to the interval because its dissimilarity is lower than the others. + ## Classifying the long-term time series -Using the matches and their associated dissimilarity measures, we can classify the subintervals of the long-term time series using \code{twdtwClassify}. To do this, we need to define a period for classification and the minimum overlap between the period and the alignments that intersect with it. We use the plot type \code{classification} to show the classification of the subintervals of the long-term time series based on the TWDTW analysis. For this example, we set classification periods of 6 months from September 2009 to September 2013, and a minimum overlap of 50% between the alignment and the classification period. This means that at least 50% of the alignment has to be contained inside of the classification period. -```{r time-series-classification, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.5, fig.align='center', fig.cap=c('Classification of each 6 months periods of the time series using results of the TWDTW analysis with logistic time-weight. The solid lines are the attributes of the time series, the background colours indicate the classification of the periods.'), fig.pos='!h'} +Using the matches and their associated dissimilarity measures, we can classify the subintervals of the long-term time series using \code{twdtwClassify}. To do this, we need to define a period for classification and the minimum overlap between the period and the alignments that intersect with it. For each interval, \code{twdtwClassify} will select the alignment that has the lowest TWDTW dissimilarity taking into account the minimum overlap condition. For example, in Figure 7 the interval from 1 September 1012 to 28 February 2013 has three overlapping alignments, maize in blue, cotton in green, and soybean in red. Without a minimum overlap the function \code{twdtwClassify} would classify this interval as maize, which has the lowest dissimilarity in the period. However, if we set a minimum overlap of 50\%, the function \code{twdtwClassify} classifies the interval as soybean, which is the only class whose alignment overlaps the interval during more than 50\% of the time. The interval of classification are usually defined according to the phenological cycles or the agricultural calendar of the region. The classification interval can also be irregular, for details see the argument \code{breaks} in \code{?twdtwClassify} + +In the example bellow we classify each period of 6 months from September 2009 to September 2013; we set a minimum overlap of 50% between the alignment and the classification period. This means that at least 50% of the alignment has to be contained inside of the classification period. We also use the plot type \code{classification} to show the classified subintervals of the long-term time series. + +```{r time-series-classification, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.8, fig.align='center', fig.cap=c('Classification of each 6 months periods of the time series using results of the TWDTW analysis with logistic time-weight. The solid lines are the attributes of the time series, the background colours indicate the classification of the periods.'), fig.pos='!ht'} ts_classification = twdtwClassify(x = matches, from = as.Date("2009-09-01"), to = as.Date("2013-09-01"), by = "6 month", overlap = 0.5) plot(ts_classification, type="classification") ``` -Comparing the results of the classified time series in Figure \ref{fig:time-series-classification} with the crop cycles in Figure \ref{fig:example-timeseries} we see that the algorithm has classified correctly all the eight subintervals from 2009 to 2013, which are, respectively: "Soybean", "Cotton", "Soybean", "Cotton", "Soybean", "Maize", "Soybean", "Maize". +By comparing the results of the classified time series in \autoref{fig:time-series-classification} with the crop cycles in \autoref{fig:example-timeseries} we see that the algorithm has classified correctly all the eight subintervals from 2009 to 2013, which are, respectively: "Soybean", "Cotton", "Soybean", "Cotton", "Soybean", "Maize", "Soybean", "Maize". -# Producing a land cover map - -In this section we present an application of TWDTW for land use and land cover change analysis using satellite image time series. Our input is a set of images, each covering the same geographical area at different times. Each pixel location is then associated to an unclassified satellite image time series. We assume to have done field work in the area; for some pixel locations and time periods, we know what is the land cover. We then will show how to obtain a set of template patterns, based on the field samples and how to apply these patterns to land cover classification of the set of images. In the end of this section we show how to perform land cover change analysis and how to do accuracy assessment. The satellite images and the field samples used in the examples come with \pkg{dtwSat} package. -Our method is not restricted to cases where the temporal patterns are obtained from the set of images. The patterns for the TWDTW analysis can be any time series with same bands or indices as the unclassified images, such as in the examples of Section \ref{classifying-a-time-series} above. -## Input data +# Producing a land cover map -The inputs are: *a)* the satellite images for a given geographical area, organised as a set of georeferenced raster files in GeoTIFF format, each containing all time steps of a spectral band or index; and *b)* a set of ground truth samples. The satellite images are extracted from the MODIS product MOD13Q1 collection 5 [@Friedl:2010] and include vegetation indexes "ndvi", "evi", and original bands "nir", "red", "blue", and "mir". This product has 250 x 250 m spatial and 16 day temporal resolution. +In this section we present an application of TWDTW for land cover change analysis using satellite image time series. Our input is a set of images, each covering the same geographical area at different times. Each pixel location is then associated to an unclassified satellite image time series. We assume to have done field work in the area; for some pixel locations and time periods, we know what is the land cover. We then will show how to obtain a set of template patterns, based on the field samples and how to apply these patterns to land cover classification of the set of images. In the end of this section we show how to perform land cover change analysis and accuracy assessment. -The region is a tropical forest area in Mato Grosso, Brazil of approximately 5300 km$^2$ with images from 2007 to 2013 (Figure \ref{fig:study-area}). This is a sequence of 160 images with 999 pixels each for 6 years. We also have a set of 603 ground truth samples of the following classes: "forest", "cotton-fallow", "soybean-cotton", "soybean-maize", and "soybean-millet". +As an example we classify approximately 5300 km$^2$ in a tropical forest region in Mato Grosso, Brazil (\autoref{fig:study-area}). This is a sequence of 160 images with 999 pixels each for 6 years, from 2007 to 2013. We also have a set of 603 ground truth samples of the following classes: "Forest", "Cotton-fallow", "Soybean-cotton", "Soybean-maize", and "Soybean-millet". The satellite images and the field samples used in the examples come with \pkg{dtwSat} package. -\begin{figure}[!h] +\begin{figure}[!ht] \begin{center} \includegraphics[width=\textwidth]{./study_area.pdf} \end{center} @@ -317,13 +316,17 @@ The region is a tropical forest area in Mato Grosso, Brazil of approximately 530 \label{fig:study-area} \end{figure} +## Input data + +The inputs are: *a)* the satellite images for a given geographical area, organised as a set of georeferenced raster files in GeoTIFF format, each containing all time steps of a spectral band or index; and *b)* a set of ground truth samples. The satellite images are extracted from the MODIS product MOD13Q1 collection 5 [@Friedl:2010] and include vegetation indices "ndvi", "evi", and original bands "nir", "red", "blue", and "mir". This product has 250 x 250 m spatial resolution and a 16 day maximum-value composite (MVC) for each pixel location [@Friedl:2010], meaning that one image can have measurements from different dates. For this reason, MOD13Q1 also includes the "day of the year" (doy) of each pixel as a layer, which we use to keep the time series consistent with the measurements. + The data files for the examples that follow are in the \pkg{dtwSat} installation folder *lucc_MT/data/*. The *tif* files include the time series of "blue", "red", "nir", "mir", "evi", "ndvi", and "doy" (day of the year); the text file *timeline* has the dates of the satellite images; the CSV file *samples.csv* has the \code{longitude}, \code{latitude}, \code{from}, \code{to}, and \code{label} for each field sample; and the text file *samples_projection* contains information about the cartographic projection of the samples, in the format of coordinate reference system used by \code{sp::CRS}. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} data_folder = system.file("lucc_MT/data", package = "dtwSat") dir(data_folder) ``` -In this example, we have stored all the time series for each band in one single file. In this way, we can use the function \code{raster::brick} to read the satellite images. The algorithm also works when the time steps for each band are split in many files. In this case, the user should call the function \code{raster::stack} with the appropriate parameters. Because of processing performance, we suggest that interested users group their images in bricks and follow the procedures given below. +We have stored all the time series for each band in one single file. In this way, we can use the function \code{raster::brick} to read the satellite images. The algorithm also works when the time steps for each band are split in many files. In this case, the user should call the function \code{raster::stack} with the appropriate parameters. Because of processing performance, we suggest that interested users group their images in bricks and follow the procedures given below. ```{r, echo = TRUE, eval = TRUE} blue = brick(paste(data_folder,"blue.tif", sep = "/")) red = brick(paste(data_folder,"red.tif", sep = "/")) @@ -335,10 +338,16 @@ day_of_year = brick(paste(data_folder,"doy.tif", sep = "/")) dates = scan(paste(data_folder,"timeline", sep = "/"), what = "dates") ``` -The set of ground truth samples in the CSV file *samples.csv* has a total of 603 samples divided in five classes: 68 "cotton-fallow", 138 "forest", 79 "soybean-cotton", 134 "soybean-maize", and 184 "soybean-millet". Reading this CSV file, we get a \code{data.frame} object, with the spatial location (\code{latitude} and \code{longitude}), starting and ending dates (\code{from} and \code{to}), and the \code{label} for each sample. +We use these data-sets to create a multiple raster time series, which is used in the next sections for the TWDTW analysis. \pkg{dtwSat} provides the constructor \code{twdtwRaster} that builds a multi-band satellite image time series. The inputs of this function are \code{RasterBrick} objects with the same temporal and spatial extents, and a vector (\code{timeline}) with the acquisition dates of the images in the format \code{"YYYY-MM-DD"}. The argument \code{doy} is combined with \code{timeline} to get the real date of each pixel, independently from each other. If \code{doy} is not provided then the dates of the pixels are given by \code{timeline}, i.e. all pixels in one image will have the same date. Products from other sensors, such as the Sentinels and Landsat, usually have all pixels with same date, therefore the argument \code{doy} is not needed. This function produces an object of class \code{twdtwRaster} with the time series of multiple satellite bands. +```{r, echo = TRUE, eval = TRUE} +raster_timeseries = twdtwRaster(blue, red, nir, mir, evi, ndvi, + timeline = dates, doy = day_of_year) +``` + +Our second input is a set of ground truth samples in the CSV file *samples.csv*, which has a total of 603 samples divided in five classes: 68 "cotton-fallow", 138 "forest", 79 "soybean-cotton", 134 "soybean-maize", and 184 "soybean-millet". Reading this CSV file, we get a \code{data.frame} object, with the spatial location (\code{latitude} and \code{longitude}), starting and ending dates (\code{from} and \code{to}), and the \code{label} for each sample. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} field_samples = read.csv(paste(data_folder,"samples.csv", sep = "/")) -head(field_samples, 2) +head(field_samples, 5) table(field_samples[["label"]]) proj_str = scan(paste(data_folder,"samples_projection", sep = "/"), what = "character") @@ -346,186 +355,151 @@ proj_str ``` -## Creating the time series and the temporal patterns +## Assessing the separability of temporal patterns -After reading our data, we need to create the time series for analysis. For this purpose, \pkg{dtwSat} provides the constructor \code{twdtwRaster} that builds a multi-band satellite image time series. The inputs of this function are \code{RasterBrick} objects with the same temporal and spatial extents, and a vector (\code{timeline}) with the acquisition dates of the images in the format \code{"YYYY-MM-DD"}. The argument \code{doy} is optional. If \code{doy} is not declared, the function builds a \code{RasterBrick} object using the dates given by \code{timeline}. This function produces an object of class \code{twdtwRaster} with the time series of multiple satellite bands. -```{r, echo = TRUE, eval = TRUE} -raster_timeseries = twdtwRaster(blue, red, nir, mir, evi, ndvi, - timeline = dates, doy = day_of_year) -``` +The classification is highly dependent on the quality of the temporal patterns. Therefore, it is useful to perform an analysis to assess the separability of the temporal pattern. Ideally, one would need patterns that, when applied to the set of unknown time series, produce consistent results (see the guidelines for single time series analysis in \autoref{classifying-a-time-series}). For this reason, before performing the land cover mapping, we perform a cross validation step. In this way, the users would assess the separability of their patterns before classifying a large data set. -We now need to identify the temporal patterns. Usually, this can be done using the collected field samples. In the next example we use the function \code{getTimeSeries} to get the time series of each field sample from our raster time series. The arguments of the function are a set of raster time series, a \code{data.frame} with spatial and temporal information about the fields samples (as in the object \code{field_samples} given above), and a \code{proj4string} with the projection information. The projection should follow the \code{sp::CRS} format. The result is an object of class \code{twdtwTimeSeries} with one time series for each field sample. +In the next example we use the function \code{getTimeSeries} to extract the time series of each field sample from our raster time series. The arguments of the function are a set of raster time series, a \code{data.frame} with spatial and temporal information about the fields samples (as in the object \code{field_samples} given above), and a \code{proj4string} with the projection information. The projection should follow the \code{sp::CRS} format. The result is an object of class \code{twdtwTimeSeries} with one time series for each field sample. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} field_samples_ts = getTimeSeries(raster_timeseries, y = field_samples, proj4string = proj_str) field_samples_ts ``` -After obtaining the time series associated to the field samples, we need to create the template patterns for each class. For this purpose, \pkg{dtwSat} provides the function \code{createPatterns}. This function fits a Generalized Additive Model (GAM) [Hastie:1986,Wood:2011] to the field samples and retrieves a smoothed temporal pattern for each band (*e.g.*, "blue", "red", "nir", "mir", "evi", and "ndvi"). We use the GAM because of its flexibility for non-parametric fits, with less rigorous assumptions on the relationship between response and predictor. This potentially provides better fit to satellite data than purely parametric models, due to the data's inter- and intra-annual variability. +To perform the cross-validation we use the function \code{twdtwCrossValidate}. This function splits the sample time series into training and validation sets using stratified sampling with a simple random sampling within each stratum, for details see \code{?caret::createDataPartition}. The function uses the training samples to create the temporal patterns and then classifies the remaining validation samples using \code{twdtwApply}. The results of the classification are used in the accuracy calculation. + +A Generalized Additive Model (GAM) [Hastie:1986,Wood:2011] generates the smoothed temporal patterns based on the training samples. We use the GAM because of its flexibility for non-parametric fits, with less rigorous assumptions on the relationship between response and predictor. This potentially provides better fit to satellite data than purely parametric models, due to the data's inter- and intra-annual variability. + +In the next example we set the arguments \code{times=100} and \code{p=0.1}, which creates 100 different data partitions, each with 10% of the samples for training and 90% for validation. The other arguments of this function are: the logistic weight function with steepness `-0.1` and midpoint `50` to \code{weight.fun}; the frequency of the temporal patterns to 8 days \code{freq=8}, and GAM smoothing formula to \code{formula = y ~ s(x)}, where function \code{s} sets up a spline model, with \code{x} the time and \code{y} a satellite band (for details see \code{?mgcv::gam} and \code{?mgcv::s}). The output is an object of class \code{twdtwCrossValidation} which includes the accuracy for each data partition. The object has two slots, the first called \code{partitions} has the index of the samples used for training, the second called \code{accuracy} has overall accuracy, user's accuracy, producer's accuracy, error matrix, and the data used in the calculation, i.e. reference classes, predicted classes, and TWDTW distance measure. + +```{r, echo = TRUE, eval = FALSE, results = 'markup'} +set.seed(1) +log_fun = logisticWeight(alpha=-0.1, beta=50) +cross_validation = twdtwCrossValidate(field_samples_ts, times=100, p=0.1, + freq = 8, formula = y ~ s(x, bs="cc"), weight.fun = log_fun) +``` +```{r, echo = FALSE, eval = TRUE} +load(system.file("lucc_MT/cross_validation.RData", package = "dtwSat")) +``` + +\autoref{fig:plot-accuracy} and \autoref{tab:cross-validation} show the 95% confidence interval of the mean for user\'s and producer\'s accuracy derived from the hundred-fold cross-validation analysis. The user\'s accuracy gives the confidence and the producer\'s accuracy gives the sensitivity of the method for each class. In our analysis all classes had high user\'s and producer\'s accuracy, meaning that TWDTW has high confidence and sensitivity for the classes included in the analysis. The cross-validation results show that if we randomly select 10% of our sampels to create temporal patterns we can get an overall accuracy of at least 97% in the classification of the remaining samples with 95% confidence level. +```{r plot-accuracy, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_width/2, fig.align='center', fig.cap='User\'s and producer\'s accuracy of the TWDTW cross-validation using 100 resampling-with-replacement. The plot shows the 95\\% confidence interval of the mean.', fig.pos='!ht'} +plot(cross_validation, conf.int=.95) +``` + +```{r, echo = FALSE, eval = TRUE, results = 'asis'} +twdtwXtable(cross_validation, conf.int=.95, digits = 2, caption="\\label{tab:cross-validation} User\'s and producer\'s accuracy of the TWDTW cross-validation using 100 resampling-with-replacement. The table shows the standard deviation ($\\sigma$) and the 95\\% confidence interval (ci) of the mean ($\\mu$).'", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` + + + +## Creating temporal patterns + +In the last section we observed that the land cover classes based on our samples are separable using the TWDTW algorithm with high confidence level. Now we randomly select 10% of our samples for training and keep the remaining 90% for validation. The first set of samples are used to create temporal patterns and classify the raster time series, and the second set of samples to assess the final maps. Ideally we would need a second independent set of samples to assess the map, but it would be very difficult to identify different crops without field work. Therefore, we use the same samples used in the cross-validation (\autoref{assessing-the-separability-of-temporal-patterns}). +```{r, echo = TRUE, eval = TRUE} +library(caret) +set.seed(1) +I = unlist(createDataPartition(field_samples[,"label"], p = 0.1)) +training_ts = subset(field_samples_ts, I) +validation_samples = field_samples[-I,] +``` + +We use the function \code{createPatterns} to produce the temporal patterns based on the training samples. For that, we need to set the desired temporal frequency of the patterns and the smoothing function for the GAM model. In the example below, we set \code{freq=8} to get temporal patterns with a frequency of 8 days, and the GAM smoothing formula \code{formula = y ~ s(x)}, such as in \autoref{assessing-the-separability-of-temporal-patterns}). -To produce the set of template patterns using the function \code{createPatterns}, we need to set the temporal frequency of the resulting patterns and the smoothing function for the GAM model. In the example below, we set \code{freq=8} to get temporal patterns with a frequency of 8 days. We also set the GAM smoothing formula to be \code{formula = y ~ s(x)}, where function \code{s} sets up a spline model, with \code{x} the time and \code{y} a satellite band (for details see \code{?mgcv::gam} and \code{?mgcv::s}). ```{r, echo = TRUE, eval = TRUE} temporal_patterns = - createPatterns(field_samples_ts, freq = 8, formula = y ~ s(x)) + createPatterns(training_ts, freq = 8, formula = y ~ s(x)) ``` -We use the plot method \code{type="patterns"} to show the results of the \code{createPatterns} in \autoref{fig:temporal-patterns}. -```{r temporal-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_width/1.5, fig.align='center', fig.pos='!h', fig.cap='Temporal patterns of forest, cotton-fallow, soybean-cotton, soybean-maize, and soybean-millet based on the ground truth samples.'} +The result of the function \code{createPatterns} is an object of the class \code{twdtwTimeSeries}. We use the plot method \code{type="patterns"} to show the results of the \code{createPatterns} in \autoref{fig:temporal-patterns}. +```{r temporal-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_width/1.5, fig.align='center', fig.pos='!h', fig.cap='Temporal patterns of Forest, Cotton-fallow, Soybean-cotton, Soybean-maize, and Soybean-millet based on the ground truth samples.'} plot(temporal_patterns, type = "patterns") + theme(legend.position = c(.8,.25)) ``` -After obtaining the template patterns for each land cover class, it is useful to perform a pre-classification analysis to assess their quality and their informational content. Ideally, one would need template patterns that, when applied to the set of unknown time series, produce consistent results. For this reason, it is advisable that the user performs a pre-classification step, along the lines of the individual analysis described in Section \ref{classifying-a-time-series}. In this way, the users would assess how good their patterns are before classifying a large data set. +Our method is not restricted to cases where the temporal patterns are obtained from the set of images, such as in the example above. Once can also use patterns from a different set of images or defined in other studies, as long as these temporal patterns stand for the study area and their bands match the bands in the unclassified images. ## Classifying the image time series -After obtaining a consistent set of temporal patterns, we use the function \code{twdtwApply} to run the TWDTW analysis for each pixel location in the raster time series. The input raster time series in the object \code{twdtwRaster} should be longer or have approximatly the same length as the temporal patterns. This function retrieves an object of class \code{twdtwRaster} with the TWDTW dissimilarity measure of the temporal patterns for each time period. The arguments \code{overwrite} and \code{format} are passed to \code{raster::writeRaster}. The arguments \code{weight.fun} and \code{overlap} are described in Section \ref{classifying-a-time-series}. Here we set the parameters of the time weight (logistic function) base on our the experience about the phenological cycle of the vegetation in the study area. In the next example, we classify the raster time series using the temporal patterns in \code{temporal_patterns} obtained as described above. The result is a \code{twdtwRaster} with five layers; each layer contains the TWDTW dissimilarity measure for one temporal pattern over time. We use the plot type \code{distance} to illustrate the TWDTW dissimilarity for each temporal pattern in 2013, cf. Figure \ref{fig:plot-dissmilarity2013}. +After obtaining a consistent set of temporal patterns, we use the function \code{twdtwApply} to run the TWDTW analysis for each pixel location in the raster time series. The input raster time series in the object \code{twdtwRaster} should be longer or have approximatly the same length as the temporal patterns. This function retrieves an object of class \code{twdtwRaster} with the TWDTW dissimilarity measure of the temporal patterns for each time period. The arguments \code{overwrite} and \code{format} are passed to \code{raster::writeRaster}. The arguments \code{weight.fun} and \code{overlap} are described in \autoref{classifying-a-time-series}. Here we set the parameters of the time weight (logistic function) base on our the experience about the phenological cycle of the vegetation in the study area. In the next example, we classify the raster time series using the temporal patterns in \code{temporal_patterns} obtained as described above. The result is a \code{twdtwRaster} with five layers; each layer contains the TWDTW dissimilarity measure for one temporal pattern over time. We use the plot type \code{distance} to illustrate the TWDTW dissimilarity for each temporal pattern in 2008, cf. \autoref{fig:plot-dissmilarity2008}. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} log_fun = logisticWeight(alpha=-0.1, beta=50) twdtw_dist = twdtwApply(x = raster_timeseries, y = temporal_patterns, overlap = 0.5, weight.fun = log_fun, overwrite=TRUE, format="GTiff") ``` - -```{r plot-dissmilarity2013, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Illustration of the TWDTW dissimilarity from each temporal pattern in 2013. The blue areas are more similar to the pattern and the red areas are less similar to the pattern.', fig.pos='!h'} -plot(x = twdtw_dist, type="distance", time.levels = 6) +```{r plot-dissmilarity2008, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Illustration of the TWDTW dissimilarity from each temporal pattern in 2008. The blue areas are more similar to the pattern and the red areas are less similar to the pattern.', fig.pos='!ht'} +plot(x = twdtw_dist, type="distance") ``` -The results of the example above can be used to create categorical land cover maps. The function \code{twdtwClassify} selects the most similar pattern for each time period and retrieves a \code{twdtwRaster} object with the time series of land use maps. The resulting object includes two layers, the first has the classified categorical maps and the second has the TWDTW dissimilarity measure. -```{r, echo = TRUE, eval = TRUE} -land_use_maps = twdtwClassify(twdtw_dist, format="GTiff", overwrite=TRUE) +The results of the example above can be used to create categorical land cover maps. The function \code{twdtwClassify} selects the most similar pattern for each time period and retrieves a \code{twdtwRaster} object with the time series of land cover maps. The resulting object includes two layers, the first has the classified categorical maps and the second has the TWDTW dissimilarity measure. +```{r, echo = TRUE, eval = TRUE, results = 'markup'} +land_cover_maps = twdtwClassify(twdtw_dist, format="GTiff", overwrite=TRUE) ``` ## Looking at the classification results -The classification results can be visualised using the \code{plot} methods of the class \code{twdtwRaster}, which supports four plot types: "maps", "area", "changes", and "distance". The \code{type="maps"} shows the land cover classification maps for each period, cf. Figure \ref{fig:plot-map}. -```{r plot-map, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Land use maps for each year from 2008 to 2013.', fig.pos='!h'} -plot(x = land_use_maps, type = "maps") +The classification results can be visualised using the \code{plot} methods of the class \code{twdtwRaster}, which supports four plot types: "maps", "area", "changes", and "distance". The \code{type="maps"} shows the land cover classification maps for each period, cf. \autoref{fig:plot-map}. +```{r plot-map, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Land cover maps for each year from 2008 to 2013.', fig.pos='!ht'} +plot(x = land_cover_maps, type = "maps") ``` -The next example shows the accumulated area for each class over time, using \code{type="area"}, cf. Figure \ref{fig:plot-area}. -```{r plot-area, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Percentage of area for each land use class from 2008 to 2013.', fig.pos='!h'} -plot(x = land_use_maps, type = "area") +The next example shows the accumulated area for each class over time, using \code{type="area"}, cf. \autoref{fig:plot-area}. +```{r plot-area, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Percentage of area for each land cover class from 2008 to 2013.', fig.pos='!ht'} +plot(x = land_cover_maps, type = "area") ``` -Users can also view the land cover transition for each time period, by setting \code{type="changes"}. For each land cover class and each period, the plot shows gains and losses in area from the other classes. This is the visual equivalent of a land transition matrix, cf. Figure \ref{fig:plot-change}. +Users can also view the land cover transition for each time period, by setting \code{type="changes"}. For each land cover class and each period, the plot shows gains and losses in area from the other classes. This is the visual equivalent of a land transition matrix, cf. \autoref{fig:plot-change}. ```{r plot-change, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Gains and losses in area from the other classes. The $y$ axis shows the actual class; the positive direction of $x$ axis shows the gains and the negative direction of $x$ axis shows the losses of the classes indicated in $y$. The colors indicate from/to which classes the gains/losses belong.', fig.pos='!h'} -plot(x = land_use_maps, type = "changes") +plot(x = land_cover_maps, type = "changes") ``` -We can also look at the dissimilarity of each classified pixel setting \code{type="distance"}. This plot can give a measure of the uncertainty of the classification of each pixel for each time period, cf. Figure \ref{fig:plot-dissmilarity}. -```{r plot-dissmilarity, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='TWDTW dissimilarity measure for each pixel over each classified period. The blue areas have high confidence and the red areas have low confidence in the classification.', fig.pos='!h'} -plot(x = land_use_maps, type="distance") +We can also look at the dissimilarity of each classified pixel setting \code{type="distance"}. This plot can give a measure of the uncertainty of the classification of each pixel for each time period, cf. \autoref{fig:plot-dissmilarity}. +```{r plot-dissmilarity, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='TWDTW dissimilarity measure for each pixel over each classified period. The blue areas have high confidence and the red areas have low confidence in the classification.', fig.pos='!ht'} +plot(x = land_cover_maps, type="distance") ``` -## Assessing the classification accuracy -In this section we show how to assess the accuracy of the TWDTW method for land cover classification. To do this, we split the ground truth samples into training and validation sets, using the function \code{splitDataset} from the package \pkg{dtwSat}. This function splits set of time series in the object \code{twdtwTimeSeries} for training and validation. The argument \code{p} defines the percentage used for training and the argument \code{times} gives the number of different partitions to create. This is a a stratified sampling with a simple random sampling within each stratum, see \code{?createDataPartition} for details. In the next example we create 100 different partitions of the data. Each partition uses 10% of the data for training and 90% for validation. The output is a list with 100 different data partitions; each partition has the temporal patterns based on the training samples and a set of time series for validation. -```{r, echo = TRUE, eval = FALSE} -set.seed(1) -partitions = splitDataset(field_samples_ts, p=0.1, times=100, - freq = 8, formula = y ~ s(x, bs="cc")) -``` -For each data partition we run the TWDTW analysis to classify the set of validation time series using the trained temporal patterns. The result is a list of \code{twdtwMatches} objects with the classified set of time series for each data partition. To compute the *User's Accuracy* (UA) and *Producer's Accuracy* (PA) of the classified time series we use the function \code{dtwSat::twdtwAssess} that retrieves a \code{data.frame} with the accuracy assessment for all data partitions. -```{r, echo = TRUE, eval = FALSE, results = 'markup'} -log_fun = logisticWeight(alpha=-0.1, beta=50) -twdtw_res = lapply(partitions, function(x){ - res = twdtwApply(x = x$ts, y = x$patterns, weight.fun = log_fun, n=1) - twdtwClassify(x = res) -}) -assessment = twdtwAssess(twdtw_res) -head(assessment, 5) -``` -```{r, echo = FALSE, eval = TRUE} -load(system.file("lucc_MT/cross_validation.RData", package = "dtwSat")) -``` - - -Figure \ref{fig:plot-accuracy} shows the average $\mu$ and standard deviation $\sigma$ of *user\'s* and *producer\'s accuracy* based on a bootstrap simulation of 100 different data partitions using resampling-with-replacement. The *user\'s accuracy* gives the confidence and the *producer\'s accuracy* gives the sensitivity of the method for each class. In our analysis all classes had high *user\'s* and *producer\'s accuracy*, meaning that TWDTW has high confidence and sensitivity for the classes included in the analysis. The average, standard deviation, and the 99\% confidence interval is also shown in Table \ref{tab:assessment}. -```{r plot-accuracy, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_width/2, fig.align='center', fig.cap='User\'s Accuracy (UA) and Producer\'s Accuracy (PA) of the TWDTW method for land cover classification. The plot shows the averages and their confidence interval for 99\\%.', fig.pos='!h'} -df = melt(assessment[,-1], id="label") -df$variable = factor(df$variable, levels = c("UA", "PA"), labels = c("User's Accuracy", "Producer's Accuracy")) -ggplot(df, aes(x=label, y=value)) + - stat_summary(fun.data="mean_cl_boot", fun.args=list(conf.int = .99), - width=0.5, geom="crossbar", size=0.1, fill = "gray") + - geom_point(size=0.2) + facet_grid(. ~ variable) + - scale_y_continuous(limits = c(0,1), labels = percent, breaks = seq(0,1,.2)) + - xlab("") + ylab("Accuracy") + coord_flip() +In this section we show how to assess the classification. \pkg{dtwSat} provides a function called \code{twdtwAssess}, which computes a set of accuracy metrics, and adjusted area such as proposed by @Olofsson:2013 and @Olofsson:2014. The inputs of this function are the classified map (an object of class \code{twdtwRaster}), and a set of samples for validation (an object of class \code{data.frame} or \code{sp::SpatialPointsDataFrame}). Besides coordinates, the samples should also have starting dates, ending dates, and lables compatible with the labels in the map (for details see \autoref{input-data}). The output of \code{twdtwAssess} is an object of class \code{twdtwAssessment}, which includes four slots: 1) \code{accuracyByPeriod} is a list of metrics for each time period, including overall accuracy, user's accuracy, produce's accuracy, error matrix (confusion matrix), and adjusted area; 2) \code{accuracySummary} has the accuracy and adjusted area accumulated over all time periods; 3) \code{data} is a \code{SpatialPointsDataFrame} with sample ID, period ID, starting date, ending date, reference label, predicted label, and TWDTW distance; and 4) \code{map} is a twdtwRaster with the raster maps. The next example uses \code{twdtwAssess} to compute the accuracy of the maps (\code{land_cover_maps}) using the validation samples (\code{validation_samples}) with a 95% confidence level. +```{r, echo = TRUE, eval = TRUE} +maps_assessment = twdtwAssess(land_cover_maps, y = validation_samples, + proj4string = proj_str, conf.int=.95) ``` + + +The results of the assessment in \autoref{tab:map-error-matrix}, \ref{tab:map-accuracy}, and \ref{tab:map-adjusted-area} are accumulated over the whole time period, i.e. the total mapped area is equal to the surface area times the number of maps. It is possible to assess and visualise each period independently from each other. However, our samples are irregularly distributed over time and some classes do not have samples in all time period, which limits the analysis of each time period independently from each other. + ```{r, echo = FALSE, eval = TRUE, results = 'asis'} -assess_mean = aggregate(assessment[, c("UA","PA")], list(assessment$label), mean) -assess_sd = aggregate(assessment[, c("UA","PA")], list(assessment$label), sd) -l_names = levels(assessment$label) -names(l_names) = l_names -ic_ua = t(sapply(l_names, function(i) 100*mean_cl_boot(x = assessment$UA[assessment$label==i], conf.int = .99)))[,-1] -ic_pa = t(sapply(l_names, function(i) 100*mean_cl_boot(x = assessment$PA[assessment$label==i], conf.int = .99)))[,-1] +twdtwXtable(maps_assessment, table.type="errormatrix", digits = 0, rotate.col = TRUE, caption="\\label{tab:map-error-matrix}Error matrix of the map classification based on TWDTW analysis. The area is in the map unit, in this case $m^2$. $w$ is the proportion of area mapped for each class.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` -assess_table = data.frame( - Class = assess_mean$Group.1, - - MUA = sprintf("%.2f", round(100*assess_mean$UA,2)), - SDUA = sprintf("(%.2f)", round(100*assess_sd$UA,2)), - CIUA = sprintf("[%.2f-%.2f]", round(as.numeric(ic_ua[,1]),2), round(as.numeric(ic_ua[,2]),2)), +As we can see in \autoref{tab:map-error-matrix} only nine samples were misclassified, all of them from the reference class "Soybean-cotton". From these samples six were classified as "Soybean-maize", and three as "Cotton-fallow". As we see in \autoref{tab:map-accuracy} the only class with producer\'s accuracy lower than $100\%$ was "Soybean-cotton", reaching $72\%$ with high uncertainty ($\pm13\%$). The user\'s accuracy for all classes was higer than $95\%$, with maximun uncertainty of $\pm5\%$. To visualise the misclassified samples on top of the maps we can use the plot \code{type="map"} for objects of class \code{twdtwAssessment}, such that `plot(x = maps_assessment, type="map", samples="incorrect")`. The user can also set the argument \code{samples} to see correctly classified samples \code{samples="correct"}, or to see all samples \code{samples="all"}. - MPA = sprintf("%.2f", round(100*assess_mean$PA,2)), - SDPA = sprintf("(%.2f)", round(100*assess_sd$PA,2)), - CIPA = sprintf("[%.2f-%.2f]", round(as.numeric(ic_pa[,1]),2), round(as.numeric(ic_pa[,2]),2)) - ) +The \autoref{fig:plot-map-incorrect-samples} shows that the misclassified samples are all in the map from 2012. The six samples of "Soybean-cotton" classified as "Soybean-maize" are within a big area of "Soybean-maize" and the three samples of "Soybean-cotton" classified as "Cotton-fallow" are near the border between this two classes. This errors might be related to the mixture of different classes in the same pixel. -x_assess = xtable::xtable(assess_table, - format = tab_format, digits = 2, label = "tab:assessment", alig=c("l","c","c","c","c","c","c","c"), - caption="User\'s and Producer\'s Accuracy of the land use classification based on TWDTW analysis. $\\mu$ is the average accuracy, $\\sigma$ the standard deviation, and CI is the confidence interval of 99\\% using 100 resampling-with-replacement.") +```{r plot-map-incorrect-samples, echo = FALSE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Incorrect classified samples.', fig.pos='!ht'} +plot(x = maps_assessment, type="map", samples="incorrect") +``` + +```{r, echo = FALSE, eval = TRUE, results = 'asis'} +twdtwXtable(maps_assessment, table.type="accuracy", show.prop = TRUE, digits = 2, rotate.col = TRUE, caption="\\label{tab:map-accuracy}Accuracy and error matrix in proportion of area of the classified map.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` + +In \autoref{tab:map-adjusted-area} we can see the mapped and the adjusted area. This is the accumulated area over the whole period, i.e. the sum of all maps from 2008 to 2013. As the "Forest" and "Soybean-millet" did not have omission ($100\%$ producer's accuracy) or comission ($100\%$ user's accuracy) erros, we immediately see that their mapped area is equal to their adjusted area (\autoref{tab:map-adjusted-area}). To help the analysis of the other classes we use the plot \code{type="area"} for class \code{twdtwAssessment}, such that `plot(x = maps_assessment, type="area", perc=FALSE)`. \autoref{fig:plot-area-and-uncertainty} shows the accumulated area mapped and adjusted for all classes. In this figure we see that our method overestimated the area of "Soybean-maize", i.e. the mapped area ($110173564\;m^2$) is bigger than the adjusted area ($104927204\;m^2$) plus the confidence interval $4113071\;m^2$. Meanwhile we underestimated the area of "Soybean-cotton", i.e. its mapped area ($18782634\;m^2$) is smaller than the adjusted area ($26260270\;m^2$) plus the confidence interval ($4805205\;m^2$). The mapped area of "Cotton-fallow" ($47600561\;m^2$) is within the confidence interval of the adjusted area ($45369285\pm2484480\;m^2$). To improve the accuracy assessment and area estimations the field samples should be better distributed over time, which would also allow for better land cover changes assessment. -addtorow = list() -addtorow$pos = list(0) -addtorow$command = paste("Class & \\multicolumn{3}{c}{User's Accuracy (UA) \\%} & \\multicolumn{3}{c}{Producer's Accuracy (PA)\\%}\\\\", paste(c("","$\\mu$","$\\sigma$","CI","$\\mu$","$\\sigma$","CI"), collapse="&"), "\\\\", collapse = "") +```{r, echo = FALSE, eval = TRUE, results = 'asis'} +twdtwXtable(maps_assessment, table.type="area", digits = 0, rotate.col = TRUE, caption="\\label{tab:map-adjusted-area}Mapped and adjusted, accumulated over the whole period, i.e. the sum from the sum of the maps from 2008 to 2013. The area is in the map unit, in this case $m^2$.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` -xtable::print.xtable(x_assess, add.to.row=addtorow, include.colnames = FALSE, include.rownames = FALSE, - comment = FALSE, caption.placement = "bottom") +```{r plot-area-and-uncertainty, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.7, fig.align='center', fig.cap='Mapped and adjusted, accumulated over the whole period, i.e. the sum from the sum of the maps from 2008 to 2013. The area is in the map unit, in this case $m^2$.', fig.pos='!ht'} +plot(x = maps_assessment, type="area", perc=FALSE) ``` @@ -533,20 +507,24 @@ xtable::print.xtable(x_assess, add.to.row=addtorow, include.colnames = FALSE, in # Conclusions and Discussion -Nowadays, there are large open archives of Earth Observation data, but few open source methods for analysing them. With this motivation, this paper provides guidance on how to use the Time-Weighed Dynamic Time Warping (TWDTW) method for remote sensing applications. As we have discussed in a companion paper [@Maus:2016], the TWDTW method is well suited for land cover change analysis of satellite image time series. +The overall accuracy of the classification with a 95% confidence level is within 97% and 99%. With same confidence level, user's and producer's accuracy are between 90% and 100% for all classes, except for "Soybean-cotton", which has wide confidence interval for user's accuracy, between 59% and 85%. A small sample size will likely have large confidence intervals [@Foody:2009], therefore, this analysis could be improved by increasing the number of "Soybean-cotton" samples. In addition, our access to field information is limited to a set of samples irregularly distributed over time, which are not enough to assess each mapped period independently from each other. Nevertheless, the results of the accuracy assessment show that the TWDTW has great potential to classify different crop types. -The main goal of \pkg{dtwSat} package is to make TWDTW accessible for researchers. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and evaluating the results. The current version of the \pkg{dtwSat} package provides a pixel-based time series classification method. We envisage that future versions of the package could include local neighborhoods to reduce border effects and improve classification homogeneity. +DTW based approaches have achieved good results for land cover classification [@Petitjean:2012; @Maus:2016], however, a reduced number of points in the time series will negatively impact the accuracy. Remotely sensed images often present noise and poor coverage due to clouds, aerosol load, surface directional effects, and sensor problems. This leads to large amount of gaps in satellite image time series. Therefore, methods that deal with irregular temporal sampling, i.e. irregular sampling intervals, have great potential to fully exploit the available satellite images archive. DTW is known to be one of the most robust methods for irregular time series [@Keogh:2005; @Tormene:2009]. It was successfully applied for satellite time series clustering using FORMOSAT-2 [@Petitjean:2012] and using MODIS [@Maus:2016]. @Petitjean:2012, for example, showed that clustering based on DTW is consistent even when there are several images missing per year because of cloud cover. However, the effect of a reduced number of samples in the time needs to be better evaluated in order to point out the limiting gap size for satellite image time series analysis using DTW based methods. -The \pkg{dtwSat} package provides two in-built functions for linear and logistic time weight. In the current version of the package the parameters of the weight functions are set manually to the same value for all land use/cover classes. Future versions of the package could include methods to search for the best parameters to be set class-by-class using field data. +The DTW approaches will search for the matches of a temporal pattern, therefore the number of points in the time series should represent the phenological cycles of different land cover types. The number of available observations might be a limitation for sensors with lower temporal resolution, such as Landsat. We believe that this limitation could be addressed, for example, by combining TWDTW analysis with pixel-based compositing techniques [@Griffiths:2013; @White:2014]. These approaches have become more popular with the opening of the USGS Landsat archive and could be used to increase the availability of gap-free time series [@Gomez:2016]. -To aim for maximum usage by the scientific community, the \pkg{dtwSat} package described in this paper works with well-known R data classes such as provided by packages \pkg{zoo} and \pkg{raster}. We are planning improvements, so that \pkg{dtwSat} can be combined with array databases, such as SciDB [@Stonebraker:2013]. We believe that combining array databases with image time series analysis software such as presented here is one way forward to scaling the process of information extracting to very large Earth Observation data. +\pkg{dtwSat} provides a dissimilarity measure in the n-dimensional space, allowing multispectral satellite image time series analysis. Our experience using MODIS data sets has shown that n-dimensional analysis, i.e. using RED, NIR, NDVI, EVI, and NDVI, increases the separability among classes when compared to single band analysis, for example using only EVI or NDVI. Further studies on multispectral data using TWDTW analysis will help to optimize the selection of bands. +The main goal of \pkg{dtwSat} package is to make TWDTW accessible for researchers. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and assessing the results. The current version of the \pkg{dtwSat} package provides a pixel-based time series classification method. We envisage that future versions of the package could include local neighborhoods to reduce border effects and improve classification homogeneity. +The \pkg{dtwSat} package provides two in-built functions for linear and logistic time weight. In the current version of the package the parameters of the weight functions are set manually to the same value for all land cover classes. Future versions could include methods to search for the best parameters for each land cover class using field data. +Nowadays, there are large open archives of Earth Observation data, but few open source methods for analysing them. With this motivation, this paper provides guidance on how to use the Time-Weighed Dynamic Time Warping (TWDTW) method for remote sensing applications. As we have discussed in a companion paper [@Maus:2016], the TWDTW method is well suited for land cover change analysis of satellite image time series. -\section*{Acknowledgments} -Victor Maus has been supported by the Institute for Geoinformatics, University of Münster (Germany), and by the Earth System Science Center, National Institute for Space Research (Brazil). Part of the research was developed in the Young Scientists Summer Program at the International Institute for Applied Systems Analysis, Laxenburg (Austria). Gilberto Câmara's term as Brazil Chair at IFGI has been supported by CAPES (grant 23038.007569\/2012-16). Gilberto's work is also supported by FAPESP e-science program (grant 2014-08398-6) and CNPq (grant 312151\/2014-4). +The TWDTW algorithm is computationally intensive and for large areas one should consider parallel processing. The algorithm is pixel time series based, i.e. each time series is processed independently from each other, and therefore, it can be easily parallelized. To aim for maximum usage by the scientific community, the \pkg{dtwSat} package described in this paper works with well-known \proglang{R} data classes such as provided by \pkg{raster}, which offers the option to work with raster data sets stored on disk that are too large to be loaded into memory (RAM) at once [@Hijmans:2015]. We are planning improvements, so that \pkg{dtwSat} can also be combined with array databases, such as SciDB [@Stonebraker:2013]. We believe that combining array databases with image time series analysis software such as presented here is one way forward to scaling the process of information extracting from very large Earth Observation data. - + +\section*{Acknowledgments} +Victor Maus has been supported by the Institute for Geoinformatics, University of Münster (Germany), and by the Earth System Science Center, National Institute for Space Research (Brazil). Gilberto Camara's term as Brazil Chair at IFGI has been supported by CAPES (grant 23038.007569\/2012-16). Gilberto's work is also supported by FAPESP e-science program (grant 2014-08398-6) and CNPq (grant 312151\/2014-4). diff --git a/inst/doc/applying_twdtw.pdf b/inst/doc/applying_twdtw.pdf index 166c6f7..a7992c1 100644 Binary files a/inst/doc/applying_twdtw.pdf and b/inst/doc/applying_twdtw.pdf differ diff --git a/man/twdtwApply.Rd b/man/twdtwApply.Rd index e048c15..03f3513 100644 --- a/man/twdtwApply.Rd +++ b/man/twdtwApply.Rd @@ -11,7 +11,7 @@ \usage{ twdtwApply(x, y, resample = TRUE, length = NULL, weight.fun = NULL, dist.method = "Euclidean", step.matrix = symmetric1, n = NULL, - span = NULL, min.length = 0.5, theta = 0.5, ...) + span = NULL, min.length = 0, theta = 0.5, ...) \S4method{twdtwApply}{twdtwTimeSeries}(x, y, resample, length, weight.fun, dist.method, step.matrix, n, span, min.length, theta, keep = FALSE, ...) diff --git a/vignettes/applying_twdtw.Rmd b/vignettes/applying_twdtw.Rmd index a88fb9f..7c8ae09 100644 --- a/vignettes/applying_twdtw.Rmd +++ b/vignettes/applying_twdtw.Rmd @@ -1,17 +1,17 @@ --- author: - name: Victor Maus - affiliation: INPE - affiliation2: University of Münster + affiliation: University of Münster + affiliation2: INPE, IIASA address: > - Image Processing Division\newline - National Institute for Space Research\newline - Av. dos Astronautas, 1758\newline - 12227-010 São José dos Campos, Brazil + Ecosystems Services and Management Program\newline + International Institute for Applied Systems Analysis\newline + Schlossplatz 1\newline + A-2361 Laxenburg, Austria email: vwmaus1@gmail.com - url: \url{www.dpi.inpe.br/maus} - Telephone: +55/12/3208-7330 - - name: "Gilberto Câmara" + url: \url{www.iiasa.ac.at/staff/maus} + Telephone: +43/2236-807-550 + - name: Gilberto Câmara affiliation: INPE affiliation2: University of Münster - name: Marius Appel @@ -19,27 +19,25 @@ author: - name: Edzer Pebesma affiliation: University of Münster title: - formatted: "\\pkg{dtwSat}: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in \\proglang{R}$^1$" + formatted: "\\pkg{dtwSat}: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in \\proglang{R}" # If you use tex in the formatted title, also supply version without plain: "dtwSat: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in R" # For running headers, if needed short: "\\pkg{dtwSat}: Time-Weighted Dynamic Time Warping" abstract: > - The opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. However, few of the proposed methods for remote sensing time series analysis are available as open source software. In this paper we present the \proglang{R} package \pkg{dtwSat}. This package provides an implementation of the Time-Weighted Dynamic Time Warping method for land use and land cover mapping using sequence of multi-band satellite images. Methods based on dynamic time warping are flexible to handle irregular sampling and out-of-phase time series, and they have achieved significant results in time series analysis. \pkg{dtwSat} is available from the Comprehensive R Archive Network and contributes to making methods for satellite time series analysis available to a larger audience. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and evaluating the results. + The opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. However, few of the proposed methods for remote sensing time series analysis are available as open source software. In this paper we present the \proglang{R} package \pkg{dtwSat}. This package provides an implementation of the Time-Weighted Dynamic Time Warping method for land cover mapping using sequence of multi-band satellite images. Methods based on dynamic time warping are flexible to handle irregular sampling and out-of-phase time series, and they have achieved significant results in time series analysis. \pkg{dtwSat} is available from the Comprehensive R Archive Network and contributes to making methods for satellite time series analysis available to a larger audience. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and assessing the results. keywords: # at least one keyword must be supplied - formatted: [dynamic programming, MODIS time series, land use changes, crop monitoring] - plain: [dynamic programming, MODIS time series, land use changes, crop monitoring] + formatted: [dynamic programming, MODIS time series, land cover changes, crop monitoring] + plain: [dynamic programming, MODIS time series, land cover changes, crop monitoring] preamble: > \usepackage{amsmath} \usepackage{array} \usepackage{caption} \usepackage{subcaption} \usepackage{float} - \usepackage{framed} - \usepackage{listings} - \usepackage{siunitx} - \usepackage{latexsym} + \usepackage{microtype} + \setlength{\tabcolsep}{4pt} documentclass: nojss classoption: shortnames output: @@ -51,7 +49,7 @@ vignette: > %\VignetteIndexEntry{dtwSat: Time-Weighted Dynamic Time Warping for Satellite Image Time Series Analysis in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} - %\VignetteDepends{rticles, dtwSat, ggplot2, scales, Hmisc, reshape2, xtable, tikzDevice} + %\VignetteDepends{rticles, dtwSat, ggplot2, scales, Hmisc, reshape2, tikzDevice} --- @@ -62,6 +60,7 @@ opts_chunk$set( message = FALSE, error = FALSE, results = "hide", + cache.path = "./cache/", cache = FALSE, comment = "" ) @@ -83,11 +82,9 @@ if (other_user) { #options(tikzDocumentDeclaration = "\\documentclass{jss}\\usepackage{siunitx}\\usepackage{latexsym}") ``` - - ```{r , echo=FALSE, eval = TRUE, cache = FALSE} # Install dtwSat package -#install.packages("dtwSat_0.2.0.9000.tar.gz") +#install.packages("dtwSat_0.2.2.9000.tar.gz", repos = NULL) ``` @@ -95,9 +92,10 @@ if (other_user) { library(dtwSat) library(ggplot2) library(scales) -library(reshape2) +library(Hmisc) new_theme = theme_get() +new_theme$text$family = "Helvetica" new_theme$text$size = 8 old_theme = theme_set(new_theme) @@ -106,30 +104,28 @@ page_width = 5.590551#in 14.2#cm page_height = 9.173228#in 23.3#cm ``` -\footnotetext[1]{This vignette is based on the paper: MAUS, V.; CAMARA, G.; APPEL, M.; PEBESMA, E. dtwSat: Time-Weighted Dynamic Time Warping for satellite image time series analysis in R. Submitted to the Journal of Statistical Software.} - \sloppy # Introduction - -Remote sensing images are the most widely used data source for measuring land use and land cover change (LUCC). In many areas, remote sensing images are the only data available for this purpose [@Lambin:2006; @Fritz:2013]. Recently, the opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. These methods include multi-image compositing [@Griffiths:2013], detecting forest disturbance and recovery [@Kennedy:2010; @Zhu:2012; @DeVries:2015], crop classification [@Xiao:2005; @Wardlow:2007; @Petitjean:2012; @Maus:2016], planted forest mapping [@Maire:2014], crop expansion and intensification [@Galford:2008; @Sakamoto:2009], detecting trend and seasonal changes [@Lunetta:2006; @Verbesselt:2010; @Verbesselt:2010a; @Verbesselt:2012], and extracting seasonality metrics from satellite time series [@Jonsson:2002; @Jonsson:2004]. Given the open availability of large image data sets, the research community on Earth Observation would get much benefit from methods that are openly available, reproducible and comparable. However, few of the proposed methods for remote sensing time series analysis are available as open source software, the main exception being the BFAST and BFAST-monitor algorithms for change detection [@Verbesselt:2010; @Verbesselt:2010a]. This paper is a contribution to making methods for satellite time series analysis available to a larger audience. +Remote sensing images are the most widely used data source for measuring land use and land cover change (LUCC). In many areas, remote sensing images are the only data available for this purpose [@Lambin:2006; @Fritz:2013]. Recently, the opening of large archives of satellite data such as LANDSAT, MODIS and the SENTINELs has given researchers unprecedented access to data, allowing them to better quantify and understand local and global land change. The need to analyse such large data sets has lead to the development of automated and semi-automated methods for satellite image time series analysis. These methods include multi-image compositing [@Griffiths:2013], detecting forest disturbance and recovery [@Kennedy:2010; @Zhu:2012; @DeVries:2015], crop classification [@Xiao:2005; @Wardlow:2007; @Petitjean:2012; @Maus:2016], planted forest mapping [@Maire:2014], crop expansion and intensification [@Galford:2008; @Sakamoto:2009], detecting trend and seasonal changes [@Lunetta:2006; @Verbesselt:2010; @Verbesselt:2010a; @Verbesselt:2012], and extracting seasonality metrics from satellite time series [@Jonsson:2002; @Jonsson:2004]. Given the open availability of large image data sets, the Earth Observation community would get much benefit from methods that are openly available, reproducible and comparable. However, few of the proposed methods for remote sensing time series analysis are available as open source software, the main exception being the BFAST and BFAST-monitor algorithms for change detection [@Verbesselt:2010; @Verbesselt:2010a]. This paper is a contribution to making methods for satellite time series analysis available to a larger audience. In this paper we describe the \pkg{dtwSat} package, written in \proglang{R} [@R:2015] and \proglang{Fortran} programming languages, and available from the Comprehensive R Archive Network at [http://CRAN.R-project.org/package=dtwSat](http://CRAN.R-project.org/package=dtwSat). The package provides an implementation of Time-Weighted Dynamic Time Warping (TWDTW) [@Maus:2016] for satellite image time series analysis. -The TWDTW method is an adaptation of the well-known dynamic time warping (DTW) method for time series analysis [@Velichko:1970; @Sakoe:1971; @Sakoe:1978; @Rabiner:1993; @Berndt:1994; @Keogh:2005; @Muller:2007] for land use and land cover classification. The standard DTW compares a temporal signature of a known event (*e.g.*, a person's speech) with an unknown time series. It finds all possible alignments between two time series and provides a dissimilarity measure [@Rabiner:1993]. In contrast to standard DTW, the TWDTW method is sensitive to seasonal changes of natural and cultivated vegetation types. It also considers inter-annual climatic and seasonal variability. In a tropical forest area, the method has achieved a high accuracy for mapping classes of single cropping, double cropping, forest, and pasture [@Maus:2016]. +The TWDTW method is an adaptation of the well-known dynamic time warping (DTW) method for time series analysis [@Velichko:1970; @Sakoe:1971; @Sakoe:1978; @Rabiner:1993; @Berndt:1994; @Keogh:2005; @Muller:2007] for land cover classification. The standard DTW compares a temporal signature of a known event (*e.g.*, a person's speech) with an unknown time series. It finds all possible alignments between two time series and provides a dissimilarity measure [@Rabiner:1993]. In contrast to standard DTW, the TWDTW method is sensitive to seasonal changes of natural and cultivated vegetation types. It also considers inter-annual climatic and seasonal variability. In a tropical forest area, the method has achieved a high accuracy for mapping classes of single cropping, double cropping, forest, and pasture [@Maus:2016]. + +We chose \proglang{R} because it is an open source software that offers a large number of reliable packages. The \pkg{dtwSat} package builds upon on a number of graphical and statistical tools in \proglang{R}: \pkg{dtw} [@Giorgino:2009], \pkg{proxy} [@Meyer:2015], \pkg{zoo} [@Zeileis:2005], \pkg{mgcv} [@Wood:2000; @Wood:2003; @Wood:2004; @Wood:2006; @Wood:2011], \pkg{sp} [@Pebesma:2005; @Bivand:2013], \pkg{raster} [@Hijmans:2015], \pkg{caret} [@Kuhn:2016], and \pkg{ggplot2} [@Wickham:2009]. Other \proglang{R} packages that are related and useful for remote sensing and land cover analysis include \pkg{landsat} [@Goslee:2011], \pkg{rgdal} [@Bivand:2015], \pkg{spacetime} [@Pebesma:2012; @Bivand:2013], \pkg{bfast} [@Verbesselt:2010; @Verbesselt:2010a], \pkg{bfastmonitor} [@Verbesselt:2011], \pkg{bfastSpatial} [@Dutrieux:2014], \pkg{MODISTools} [@Tuck:2014], \pkg{maptools} [@Bivand:2015], and \pkg{lucc} [@Moulds:2015]. Using existing packages as building blocks, software developers in \proglang{R} save a lot of time and can concentrate on their intended goals. -We chose \proglang{R} because it is an open source software that offers a large number of reliable packages. The \pkg{dtwSat} package builds upon on a number of graphical and statistical tools in \proglang{R}: \pkg{dtw} [@Giorgino:2009], \pkg{proxy} [@Meyer:2015], \pkg{zoo} [@Zeileis:2005], \pkg{mgcv} [@Wood:2000; @Wood:2003; @Wood:2004; @Wood:2006; @Wood:2011], \pkg{sp} [@Pebesma:2005; @Bivand:2013], \pkg{raster} [@Hijmans:2015], \pkg{caret} [@Kuhn:2016], and \pkg{ggplot2} [@Wickham:2009]. Other \proglang{R} packages that are related and useful for remote sensing and land use analysis include \pkg{landsat} [@Goslee:2011], \pkg{rgdal} [@Bivand:2015], \pkg{spacetime} [@Pebesma:2012; @Bivand:2013], \pkg{bfast} [@Verbesselt:2010; @Verbesselt:2010a], \pkg{bfastmonitor} [@Verbesselt:2011], \pkg{bfastSpatial} [@Dutrieux:2014], \pkg{MODISTools} [@Tuck:2014], \pkg{maptools} [@Bivand:2015], and \pkg{lucc} [@Moulds:2015]. Using existing packages as building blocks, software developers in \proglang{R} save a lot of time and can concentrate on their intended goals. +There is already an \proglang{R} package that implements the standard DTW method for time series analysis: the \pkg{dtw} package [@Giorgino:2009]. In the \pkg{dtwSat} package, we focus on the specific case of satellite image time series analysis. The analysis method implemented in \pkg{dtwSat} package extends that of the \pkg{dtw} package; it adjusts the standard DTW method to account for the seasonality of different types of land cover. Our aim is to support the full cycle of land cover classification, from selecting sample patterns to visualising and assessing the final result. -There is already an \proglang{R} package that implements the standard DTW method for time series analysis: the \pkg{dtw} package [@Giorgino:2009]. In the \pkg{dtwSat} package, we focus on the specific case of satellite image time series analysis. The analysis method implemented in \pkg{dtwSat} package extends that of the \pkg{dtw} package; it adjusts the standard DTW method to account for the seasonality of different types of land cover. Our aim is to support the full cycle of land use and land cover classification, from selecting sample patterns to visualising and evaluating the final result. +This paper focuses on the motivation and guidance for using the TWDTW method for remote sensing applications. The full description of the method is available in a paper published by the lead author [@Maus:2016]. In what follows, the \autoref{the-time-weighted-dynamic-time-warping-method} describes the application of TWDTW [@Maus:2016] for satellite time series analysis. The \autoref{dtwsat-package-overview} gives an overview of the \pkg{dtwSat} package. Then, \autoref{classifying-a-time-series} focuses on the analysis of a single time series and shows some visualisation methods. We then present an example of a complete land cover change analysis for a study area in Mato Grosso, Brazil in \autoref{producing-a-land-cover-map}. -This paper focuses on the motivation and guidance for using the TWDTW method for remote sensing applications. The full description of the method is available in a paper published by the lead author [@Maus:2016]. In what follows, Section \ref{dtwsat-package-overview} gives an overview of the \pkg{dtwSat} package. The Section \ref{the-time-weighted-dynamic-time-warping-method} describes the application of TWDTW [@Maus:2016] for satellite time series analysis. Then, Section \ref{classifying-a-time-series} focuses on the analysis of a single time series and shows some visualisation methods. We then present an example of a complete land use and land cover change analysis for a study area in the Mato Grosso, Brazil in Section \ref{producing-a-land-cover-map}. # The Time-Weighted Dynamic Time Warping method -In this section, we describe the Time-Weighted Dynamic Time Warping (TWDTW) algorithm in general terms. For a detailed technical explanation, refer to @Maus:2016. TWDTW is time-constrained version of the Dynamic Time Warping (DTW) algorithm. Although the standard DTW method is good for shape matching [@Keogh:2005], it is not suited *per se* for satellite image time series analysis, since it disregards the temporal range when finding the best matches between two time series [@Maus:2016]. When using image time series for land cover classification, one needs to balance between shape matching and temporal alignment, since each land cover class has a distinct phenological cycle associated with the vegetation [@Reed:1994,@Zhang:2003]. For example, soybeans and maize cycles range from 90 to 120 days, whereas sugar-cane has a 360 to 720 days cycle. A time series with cycle larger than 180 days is unlikely to come from soybeans or maize. For this reason, @Maus:2016 include a time constraint in DTW to account for seasonality. The resulting method is capable of distinguishing different land use and land cover classes. +In this section, we describe the Time-Weighted Dynamic Time Warping (TWDTW) algorithm in general terms. For a detailed technical explanation, refer to @Maus:2016. TWDTW is time-constrained version of the Dynamic Time Warping (DTW) algorithm. Although the standard DTW method is good for shape matching [@Keogh:2005], it is not suited *per se* for satellite image time series analysis, since it disregards the temporal range when finding the best matches between two time series [@Maus:2016]. When using image time series for land cover classification, one needs to balance between shape matching and temporal alignment, since each land cover class has a distinct phenological cycle associated with the vegetation [@Reed:1994,@Zhang:2003]. For example, soybeans and maize cycles range from 90 to 120 days, whereas sugar-cane has a 360 to 720 days cycle. A time series with cycle larger than 180 days is unlikely to come from soybeans or maize. For this reason, @Maus:2016 include a time constraint in DTW to account for seasonality. The resulting method is capable of distinguishing different land cover classes. -The inputs to TWDTW are: (a) a set of time series of known temporal patterns (*e.g.*, phenological cycles of land cover classes); (b) an unclassified long-term satellite image time series. For each temporal pattern, the algorithm finds all matching subintervals in the long-term time series, providing a dissimilarity measure (cf. Figure \ref{fig:twdtw-example}). The result of the algorithm is a set of subintervals, each associated with a pattern and with a dissimilarity measure. We then break the unclassified time series in periods according to our needs (*e.g.*, yearly, seasonality, monthly). For each period, we consider all matching subintervals that intersect with it, and classify them based on the land cover class of the best matching subinterval. In this way, the long-term satellite time series is divided in periods, and each period is assigned a land cover class. +The inputs to TWDTW are: (a) a set of time series of known temporal patterns (*e.g.*, phenological cycles of land cover classes); (b) an unclassified long-term satellite image time series. For each temporal pattern, the algorithm finds all matching subintervals in the long-term time series, providing a dissimilarity measure (cf. \autoref{fig:twdtw-example}). The result of the algorithm is a set of subintervals, each associated with a pattern and with a dissimilarity measure. We then break the unclassified time series in periods according to our needs (*e.g.*, yearly, seasonality, monthly). For each period, we consider all matching subintervals that intersect with it, and classify them based on the land cover class of the best matching subinterval. In this way, the long-term satellite time series is divided in periods, and each period is assigned a land cover class. ```{r twdtw-example, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_height/3.5, fig.align='center', fig.cap='Matches of the known temporal pattern to subintervals of the long-term time series. The solid black line is the long-term time series, the colored lines are the different matches of the same pattern ordered by TWDTW dissimilarity measure, and the gray dashed lines are the matching points.', fig.pos='h'} n=4 @@ -143,12 +139,12 @@ df_dist$y = 1.8 plotMatches(mat, attr="evi", k=n) + ylab("Time series Pattern") + geom_text(data=df_dist, mapping = aes_string(x='to', y='y', label='label'), - size = 2) + + size = 2, family="Helvetica") + theme(legend.position="none") ``` -To use TWDTW for land use and land cover classification, we need the following data sets: +To use TWDTW for land cover classification, we need the following data sets: - A set of remote sensing time series for the study area. For example, a tile of a MODIS MOD13Q1 image consists of 4800 x 4800 pixels, covering an area of 10 degrees x 10 degrees at the Equator [@Friedl:2010]. A 15-year (2000-2015) MODIS MOD13Q1 set time series has 23 images per year, with a total of 23 million time series, each with 346 samples. @@ -156,7 +152,7 @@ To use TWDTW for land use and land cover classification, we need the following d - A set of ground truth points, with spatial and temporal information and land cover classification. These *ground truth* points are used for validation and accuracy assessment. -Based on the information provided by the user about the images to be analysed, our method maps them to a three-dimensional (3-D) array in space-time (Figure \ref{fig:3-D-array}). This array can have multiple attributes, such as the satellite bands (*e.g.*, "red", "nir", and "blue"), and derived indices (*e.g.*, "NDVI", "EVI", and "EVI2"). This way, each pixel location is associated to a sequence of measurements, building a satellite image time series. Figure \ref{fig:3-D-array} shows an example of "evi" time series for a location in the Brazilian Amazon from 2000 to 2008. In the first two years, the area was covered by forest that was cut in 2002. The area was then used for cattle raising (pasture) for three years, and then for crop production from 2006 to 2008. Satellite image time series are thus useful to describe the dynamics of the landscape and the land use trajectories. +Based on the information provided by the user about the images to be analysed, our method maps them to a three-dimensional (3-D) array in space-time (\autoref{fig:3-D-array}). This array can have multiple attributes, such as the satellite bands (*e.g.*, "red", "nir", and "blue"), and derived indices (*e.g.*, "NDVI", "EVI", and "EVI2"). This way, each pixel location is associated to a sequence of measurements, building a satellite image time series. \autoref{fig:3-D-array} shows an example of "EVI" time series for a location in the Brazilian Amazon from 2000 to 2008. In the first two years, the area was covered by forest that was cut in 2002. The area was then used for cattle raising (pasture) for three years, and then for crop production from 2006 to 2008. Satellite image time series are thus useful to describe the dynamics of the landscape and the land use trajectories. \begin{figure}[!h] \begin{center} @@ -168,6 +164,7 @@ Based on the information provided by the user about the images to be analysed, o \end{figure} + # dtwSat package overview \pkg{dtwSat} provides a set of functions for land cover change analysis using satellite image time series. This includes functions to build temporal patterns for land cover types, apply the TWDTW analysis using different weighting functions, visualise the results in a graphical interface, produce land cover maps, and create spatiotemporal plots for land changes. Therefore, \pkg{dtwSat} gives an end-to-end solution for satellite time series analysis, which users can make a complete land change analysis. @@ -185,6 +182,7 @@ The class \code{twdtwRaster} is used for satellite image time series. This class + # Classifying a time series This section describes how to classify one time series, using examples that come with the \pkg{dtwSat} package. We will show how to match three temporal patterns ("soybean", "cotton", and "maize") to subintervals of a long-term satellite image time series. These time series have been extracted from a set of MODIS MOD13Q1 [@Friedl:2010] images and include the vegetation indices "ndvi", "evi", and the original bands "nir", "red", "blue", and "mir". In this example, the classification of crop types for the long-term time series is known. @@ -193,17 +191,17 @@ This section describes how to classify one time series, using examples that come The inputs for the next examples are time series in \pkg{zoo} format. The first is an object of class \code{zoo} with a long-term time series, referred to as \code{MOD13Q1.ts}, and the second is a \code{list} of time series of class \code{zoo} with the temporal patterns of "soybean", "cotton", and "maize", referred to as \code{MOD13Q1.patterns.list}. -From \code{zoo} objects we construct time series of class \code{twdtwTimeSeries}, for which we have a set of visualization and analysis methods implemented in the \pkg{dtwSat} package. The code below builds two objects of class \code{twdtwTimeSeries}. The first has the long-term time series and second has the temporal patterns. We use the plot method types \code{timeseries} and \code{patterns} to shown the objects \code{ts} in Figure \ref{fig:example-timeseries} and \code{patterns_ts} in Figure \ref{fig:temporal-patterns-soy-cot-mai}, respectively. This plot method uses \code{ggplot} syntax. -```{r, echo = TRUE, eval = TRUE, results = 'markup'} -ts = twdtwTimeSeries(MOD13Q1.ts, labels="Time series") -patterns_ts = twdtwTimeSeries(MOD13Q1.patterns.list) -MOD13Q1.ts.labels -``` +From \code{zoo} objects we construct time series of class \code{twdtwTimeSeries}, for which we have a set of visualization and analysis methods implemented in the \pkg{dtwSat} package. The code below builds two objects of class \code{twdtwTimeSeries}. The first has the long-term time series and second has the temporal patterns. We use the plot method types \code{timeseries} and \code{patterns} to shown the objects \code{ts} in \autoref{fig:example-timeseries} and \code{MOD13Q1.ts} in \autoref{fig:temporal-patterns-soy-cot-mai}, respectively. This plot method uses \code{ggplot} syntax. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} library(dtwSat) names(MOD13Q1.patterns.list) head(MOD13Q1.ts, n = 2) ``` +```{r, echo = TRUE, eval = TRUE, results = 'markup'} +ts = twdtwTimeSeries(MOD13Q1.ts, labels="Time series") +patterns_ts = twdtwTimeSeries(MOD13Q1.patterns.list) +patterns_ts +``` @@ -216,7 +214,7 @@ plot(ts, type = "timeseries") + plot(patterns_ts, type = "patterns") ``` -TWDTW uses both amplitude and phase information to classify the phenological cycles in the long-term time series. The EVI peak of the "soybean" time series has a similar amplitude as that of "cotton". However, the "soybean" series peaks in late December while the "cotton" series peaks in early April. The EVI peak of the "maize" time series is at the same period as the peak of "cotton". However, the "maize" time series has smaller amplitude than the "cotton" one. Therefore, we can improve the time series classification by combining shape and time information. +TWDTW uses both amplitude and phase information to classify the phenological cycles in the long-term time series. The differences in the amplitude and phase of the cycles are more clear when we observe the EVI signal in Figures 3 and 4. The EVI peak of the "soybean" time series has a similar amplitude as that of "cotton". However, the "soybean" series peaks in late December while the "cotton" series peaks in early April. The EVI peak of the "maize" time series is at the same period as the peak of "cotton". However, the "maize" time series has smaller amplitude than the "cotton" one. Therefore, combining shape and time information we can improve the time series classification. ## Detection of time series patterns with TWDTW @@ -229,7 +227,7 @@ matches = slotNames(matches) show(matches) ``` -To retrieve the complete information of the matches we set \code{keep=TRUE}. We need this information for the plot methods of the class \code{twdtwMatches}. The argument \code{weight.fun} defines the time-weight to the dynamic time warping analysis [@Maus:2016]. By default the time-weight is zero, meaning that the function will run a standard dynamic time warping analysis. The arguments \code{x} and \code{y} are objects of class \code{twdtwTimeSeries} with the unclassified long-term time series and the temporal patterns, respectively. For details and other arguments see \code{?twdtwApply}. +To retrieve the complete information of the matches we set \code{keep=TRUE}. We need this information for the plot methods of the class \code{twdtwMatches}. The argument \code{weight.fun} defines the time-weight to the dynamic time warping analysis [@Maus:2016]. By default the time-weight is zero, meaning that the function will run a standard dynamic time warping analysis. The arguments \code{x} and \code{y} are objects of class \code{twdtwTimeSeries} with the unclassified long-term time series and the temporal patterns, respectively. To perform the alignment between the time series the default TWDTW recursion has a symmetric step (for more details and other recursion options see \code{?stepPattern}). @Giorgino:2009 provides a detaild discussion on the recursion steps and other step patterns. For further details and other arguments of the TWDTW analysis see \code{?twdtwApply}. In our example we use a logistic weight function for the temporal constraint of the TWDTW algorithm. This function is defined by \code{logisticWeight}. The \pkg{dtwSat} package provides two in-built functions: \code{linearWeight} and \code{logisticWeight}. The \code{linearWeight} function with slope \code{a} and intercept \code{b} is given by $$ @@ -241,7 +239,7 @@ $$ \omega = \frac{1}{1 + e^{-\alpha(g(t_1,t_2)-\beta)} }. \label{eq:nonlineartw} $$ -The function $g$ is the absolute difference in days between two dates, $t_1$ and $t_2$. The linear function creates a strong time constraint even for small time differences. The logistic function has a low weight for small time warps and significant costs for bigger time warps, cf. Figure \ref{fig:logist-time-weight}. In our previous studies [@Maus:2016] the logistic-weight had better results than the linear-weight for land cover classification. Users can define different weight functions as temporal constraints in the argument \code{weight.fun} of the \code{twdtwApply} method. +The function $g$ is the absolute difference in days between two dates, $t_1$ and $t_2$. The aim of these functions is to control the time warp, e.g. a "large time warp" is needed to match a point of the temporal pattern whose original date is January 1 to a point of the long-term time series whose date is July 1, on the other hand to match January 1 to December 15 has a "small time warp". If there is a large seasonal difference between the pattern and its matching point in time series, an extra cost is added to the DTW distance measure. This constraint controls the time warping and makes the time series alignment dependent on the seasons. This is especially useful for detecting temporary crops and for distinguishing pasture from agriculture. The linear function creates a strong time constraint even for small time differences, including small time warps. The logistic function has a low weight for small time warps and significant costs for bigger time warps, cf. \autoref{fig:logist-time-weight}. In our previous studies [@Maus:2016] the logistic-weight had better results than the linear-weight for land cover classification. Users can define different weight functions as temporal constraints in the argument \code{weight.fun} of the function \code{twdtwApply}. ```{r logist-time-weight, echo = FALSE, eval = TRUE, out.width=paste0(page_width/2,'in'), fig.align='center', fig.cap='Logistic time-weight function \\code{logisticWeight} with steepness \\code{alpha=-0.1} and midpoint \\code{beta=100}. The $x$ axis shows the absolute difference between two dates in days and the $y$ axis shows the time-weight \\citep{Maus:2016}.', fig.pos='!h'} # Maximum time difference in days max_diff = 366/2 @@ -262,9 +260,9 @@ df_weight = melt(df_weight, id.vars = "Difference") names(df_weight)[-1] = c("Functions","Weight") ggplot(df_weight, aes_string(x="Difference", y="Weight", group="Functions", linetype="Functions")) + geom_line() + xlab("Time difference (days)") + - theme(text = element_text(size = 10), - plot.title = element_text(size = 10, face="bold"), - axis.title = element_text(size = 10), + theme(text = element_text(size = 10, family="Helvetica"), + plot.title = element_text(size = 10, family="Helvetica", face="bold"), + axis.title = element_text(size = 10, family="Helvetica"), legend.position = c(.3,.85), legend.background = element_rect(fill="transparent")) + scale_linetype(guide_legend(title = "")) ``` @@ -273,43 +271,44 @@ ggplot(df_weight, aes_string(x="Difference", y="Weight", group="Functions", line \pkg{dtwSat} provides five ways to visualise objects of class \code{twdtwMatches} through the plot types: \code{matches}, \code{alignments}, \code{classification}, \code{path}, and \code{cost}. The plot type \code{matches} shows the matching points of the patterns in the long-term time series; the plot type \code{alignments} shows the alignments and dissimilarity measures; the plot type \code{path} shows the low cost paths in the TWDTW cost matrix; and the plot type \code{cost} allows the visualisation of the cost matrices (local cost, accumulated cost, and time cost); and the plot type \code{classification} shows the classification of the long-term time series based on the TWDTW analysis. The plot methods for class \code{twdtwMatches} return a \code{ggplot} object, so that users can further manipulate the result using the \pkg{ggplot2} package. For more details on visualisation functions, please refer to the \pkg{dtwSat} documentation in the CRAN [@Maus:2015a]. -We now describe the plot types \code{matches} and \code{alignments}. The code bellow shows how to visualise the matching points of the four best matches of "soybean" pattern in the long-term time series, cf. Figure \ref{fig:twdtw-matches}. +We now describe the plot types \code{matches} and \code{alignments}. The code bellow shows how to visualise the matching points of the four best matches of "soybean" pattern in the long-term time series, cf. \autoref{fig:twdtw-matches}. ```{r twdtw-matches, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/3.5, fig.align='center', fig.cap=c('The four best matches of the "soybean" pattern in the time series using a logistic time-weight. The solid black line is the long-term time series; the coloured lines are the temporal patterns; and the grey dashed lines are the respective matching points.'), fig.pos='!h'} plot(matches, type="matches", patterns.labels="Soybean", k=4) ``` -The next example (Figure \ref{fig:alignments-all-patterns}) uses the plot type \code{alignments} to show the alignments of the temporal patterns. We set the threshold for the dissimilarity measure to be lower than $3.0$. This is useful to display the different subintervals of the long-term time series that have at least one alignment whose dissimilarity is less than the specified threshold. +The next example uses the plot type \code{alignments} to show the alignments of the temporal patterns (see \autoref{fig:alignments-all-patterns}). We set the threshold for the dissimilarity measure to be lower than $3.0$. This plot displays the different subintervals of the long-term time series that have alignments whose dissimilarity is less than the specified threshold. ```{r alignments-all-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.5, fig.align='center', fig.cap=c('Alignments and dissimilarity measures of the patterns "soybean", "cotton", and "maize" to the subintervals of the long-term time series using a logistic time-weight. The solid black line is the EVI time series, and the coloured lines are the alignments of the patterns that have dissimilarity measure lower than three.'), fig.pos='!h'} plot(matches, type="alignments", attr = "evi", threshold = 3.0) ``` +\autoref{fig:alignments-all-patterns} shows the alignments of each pattern over the long-term time series, note that we can rank the alignments by their TWDTW dissimilarity, i.e. alignments overlapping the same period usually have distinct dissimilarity, which can be used to rank them. In the figure we can see that maize (blue lines) and cotton (green lines) overlap approximately the same time periods, however, they have distinct dissimilarity measures, and therefore, can be ranked. Observing the time period from January 2010 to July 2010, both soybean, maize, and cotton have at least one overlapping alignment, however in this case the cotton pattern matches better to the interval because its dissimilarity is lower than the others. + ## Classifying the long-term time series -Using the matches and their associated dissimilarity measures, we can classify the subintervals of the long-term time series using \code{twdtwClassify}. To do this, we need to define a period for classification and the minimum overlap between the period and the alignments that intersect with it. We use the plot type \code{classification} to show the classification of the subintervals of the long-term time series based on the TWDTW analysis. For this example, we set classification periods of 6 months from September 2009 to September 2013, and a minimum overlap of 50% between the alignment and the classification period. This means that at least 50% of the alignment has to be contained inside of the classification period. -```{r time-series-classification, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.5, fig.align='center', fig.cap=c('Classification of each 6 months periods of the time series using results of the TWDTW analysis with logistic time-weight. The solid lines are the attributes of the time series, the background colours indicate the classification of the periods.'), fig.pos='!h'} +Using the matches and their associated dissimilarity measures, we can classify the subintervals of the long-term time series using \code{twdtwClassify}. To do this, we need to define a period for classification and the minimum overlap between the period and the alignments that intersect with it. For each interval, \code{twdtwClassify} will select the alignment that has the lowest TWDTW dissimilarity taking into account the minimum overlap condition. For example, in Figure 7 the interval from 1 September 1012 to 28 February 2013 has three overlapping alignments, maize in blue, cotton in green, and soybean in red. Without a minimum overlap the function \code{twdtwClassify} would classify this interval as maize, which has the lowest dissimilarity in the period. However, if we set a minimum overlap of 50\%, the function \code{twdtwClassify} classifies the interval as soybean, which is the only class whose alignment overlaps the interval during more than 50\% of the time. The interval of classification are usually defined according to the phenological cycles or the agricultural calendar of the region. The classification interval can also be irregular, for details see the argument \code{breaks} in \code{?twdtwClassify} + +In the example bellow we classify each period of 6 months from September 2009 to September 2013; we set a minimum overlap of 50% between the alignment and the classification period. This means that at least 50% of the alignment has to be contained inside of the classification period. We also use the plot type \code{classification} to show the classified subintervals of the long-term time series. + +```{r time-series-classification, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.8, fig.align='center', fig.cap=c('Classification of each 6 months periods of the time series using results of the TWDTW analysis with logistic time-weight. The solid lines are the attributes of the time series, the background colours indicate the classification of the periods.'), fig.pos='!ht'} ts_classification = twdtwClassify(x = matches, from = as.Date("2009-09-01"), to = as.Date("2013-09-01"), by = "6 month", overlap = 0.5) plot(ts_classification, type="classification") ``` -Comparing the results of the classified time series in Figure \ref{fig:time-series-classification} with the crop cycles in Figure \ref{fig:example-timeseries} we see that the algorithm has classified correctly all the eight subintervals from 2009 to 2013, which are, respectively: "Soybean", "Cotton", "Soybean", "Cotton", "Soybean", "Maize", "Soybean", "Maize". +By comparing the results of the classified time series in \autoref{fig:time-series-classification} with the crop cycles in \autoref{fig:example-timeseries} we see that the algorithm has classified correctly all the eight subintervals from 2009 to 2013, which are, respectively: "Soybean", "Cotton", "Soybean", "Cotton", "Soybean", "Maize", "Soybean", "Maize". -# Producing a land cover map - -In this section we present an application of TWDTW for land use and land cover change analysis using satellite image time series. Our input is a set of images, each covering the same geographical area at different times. Each pixel location is then associated to an unclassified satellite image time series. We assume to have done field work in the area; for some pixel locations and time periods, we know what is the land cover. We then will show how to obtain a set of template patterns, based on the field samples and how to apply these patterns to land cover classification of the set of images. In the end of this section we show how to perform land cover change analysis and how to do accuracy assessment. The satellite images and the field samples used in the examples come with \pkg{dtwSat} package. -Our method is not restricted to cases where the temporal patterns are obtained from the set of images. The patterns for the TWDTW analysis can be any time series with same bands or indices as the unclassified images, such as in the examples of Section \ref{classifying-a-time-series} above. -## Input data +# Producing a land cover map -The inputs are: *a)* the satellite images for a given geographical area, organised as a set of georeferenced raster files in GeoTIFF format, each containing all time steps of a spectral band or index; and *b)* a set of ground truth samples. The satellite images are extracted from the MODIS product MOD13Q1 collection 5 [@Friedl:2010] and include vegetation indexes "ndvi", "evi", and original bands "nir", "red", "blue", and "mir". This product has 250 x 250 m spatial and 16 day temporal resolution. +In this section we present an application of TWDTW for land cover change analysis using satellite image time series. Our input is a set of images, each covering the same geographical area at different times. Each pixel location is then associated to an unclassified satellite image time series. We assume to have done field work in the area; for some pixel locations and time periods, we know what is the land cover. We then will show how to obtain a set of template patterns, based on the field samples and how to apply these patterns to land cover classification of the set of images. In the end of this section we show how to perform land cover change analysis and accuracy assessment. -The region is a tropical forest area in Mato Grosso, Brazil of approximately 5300 km$^2$ with images from 2007 to 2013 (Figure \ref{fig:study-area}). This is a sequence of 160 images with 999 pixels each for 6 years. We also have a set of 603 ground truth samples of the following classes: "forest", "cotton-fallow", "soybean-cotton", "soybean-maize", and "soybean-millet". +As an example we classify approximately 5300 km$^2$ in a tropical forest region in Mato Grosso, Brazil (\autoref{fig:study-area}). This is a sequence of 160 images with 999 pixels each for 6 years, from 2007 to 2013. We also have a set of 603 ground truth samples of the following classes: "Forest", "Cotton-fallow", "Soybean-cotton", "Soybean-maize", and "Soybean-millet". The satellite images and the field samples used in the examples come with \pkg{dtwSat} package. -\begin{figure}[!h] +\begin{figure}[!ht] \begin{center} \includegraphics[width=\textwidth]{./study_area.pdf} \end{center} @@ -317,13 +316,17 @@ The region is a tropical forest area in Mato Grosso, Brazil of approximately 530 \label{fig:study-area} \end{figure} +## Input data + +The inputs are: *a)* the satellite images for a given geographical area, organised as a set of georeferenced raster files in GeoTIFF format, each containing all time steps of a spectral band or index; and *b)* a set of ground truth samples. The satellite images are extracted from the MODIS product MOD13Q1 collection 5 [@Friedl:2010] and include vegetation indices "ndvi", "evi", and original bands "nir", "red", "blue", and "mir". This product has 250 x 250 m spatial resolution and a 16 day maximum-value composite (MVC) for each pixel location [@Friedl:2010], meaning that one image can have measurements from different dates. For this reason, MOD13Q1 also includes the "day of the year" (doy) of each pixel as a layer, which we use to keep the time series consistent with the measurements. + The data files for the examples that follow are in the \pkg{dtwSat} installation folder *lucc_MT/data/*. The *tif* files include the time series of "blue", "red", "nir", "mir", "evi", "ndvi", and "doy" (day of the year); the text file *timeline* has the dates of the satellite images; the CSV file *samples.csv* has the \code{longitude}, \code{latitude}, \code{from}, \code{to}, and \code{label} for each field sample; and the text file *samples_projection* contains information about the cartographic projection of the samples, in the format of coordinate reference system used by \code{sp::CRS}. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} data_folder = system.file("lucc_MT/data", package = "dtwSat") dir(data_folder) ``` -In this example, we have stored all the time series for each band in one single file. In this way, we can use the function \code{raster::brick} to read the satellite images. The algorithm also works when the time steps for each band are split in many files. In this case, the user should call the function \code{raster::stack} with the appropriate parameters. Because of processing performance, we suggest that interested users group their images in bricks and follow the procedures given below. +We have stored all the time series for each band in one single file. In this way, we can use the function \code{raster::brick} to read the satellite images. The algorithm also works when the time steps for each band are split in many files. In this case, the user should call the function \code{raster::stack} with the appropriate parameters. Because of processing performance, we suggest that interested users group their images in bricks and follow the procedures given below. ```{r, echo = TRUE, eval = TRUE} blue = brick(paste(data_folder,"blue.tif", sep = "/")) red = brick(paste(data_folder,"red.tif", sep = "/")) @@ -335,10 +338,16 @@ day_of_year = brick(paste(data_folder,"doy.tif", sep = "/")) dates = scan(paste(data_folder,"timeline", sep = "/"), what = "dates") ``` -The set of ground truth samples in the CSV file *samples.csv* has a total of 603 samples divided in five classes: 68 "cotton-fallow", 138 "forest", 79 "soybean-cotton", 134 "soybean-maize", and 184 "soybean-millet". Reading this CSV file, we get a \code{data.frame} object, with the spatial location (\code{latitude} and \code{longitude}), starting and ending dates (\code{from} and \code{to}), and the \code{label} for each sample. +We use these data-sets to create a multiple raster time series, which is used in the next sections for the TWDTW analysis. \pkg{dtwSat} provides the constructor \code{twdtwRaster} that builds a multi-band satellite image time series. The inputs of this function are \code{RasterBrick} objects with the same temporal and spatial extents, and a vector (\code{timeline}) with the acquisition dates of the images in the format \code{"YYYY-MM-DD"}. The argument \code{doy} is combined with \code{timeline} to get the real date of each pixel, independently from each other. If \code{doy} is not provided then the dates of the pixels are given by \code{timeline}, i.e. all pixels in one image will have the same date. Products from other sensors, such as the Sentinels and Landsat, usually have all pixels with same date, therefore the argument \code{doy} is not needed. This function produces an object of class \code{twdtwRaster} with the time series of multiple satellite bands. +```{r, echo = TRUE, eval = TRUE} +raster_timeseries = twdtwRaster(blue, red, nir, mir, evi, ndvi, + timeline = dates, doy = day_of_year) +``` + +Our second input is a set of ground truth samples in the CSV file *samples.csv*, which has a total of 603 samples divided in five classes: 68 "cotton-fallow", 138 "forest", 79 "soybean-cotton", 134 "soybean-maize", and 184 "soybean-millet". Reading this CSV file, we get a \code{data.frame} object, with the spatial location (\code{latitude} and \code{longitude}), starting and ending dates (\code{from} and \code{to}), and the \code{label} for each sample. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} field_samples = read.csv(paste(data_folder,"samples.csv", sep = "/")) -head(field_samples, 2) +head(field_samples, 5) table(field_samples[["label"]]) proj_str = scan(paste(data_folder,"samples_projection", sep = "/"), what = "character") @@ -346,186 +355,151 @@ proj_str ``` -## Creating the time series and the temporal patterns +## Assessing the separability of temporal patterns -After reading our data, we need to create the time series for analysis. For this purpose, \pkg{dtwSat} provides the constructor \code{twdtwRaster} that builds a multi-band satellite image time series. The inputs of this function are \code{RasterBrick} objects with the same temporal and spatial extents, and a vector (\code{timeline}) with the acquisition dates of the images in the format \code{"YYYY-MM-DD"}. The argument \code{doy} is optional. If \code{doy} is not declared, the function builds a \code{RasterBrick} object using the dates given by \code{timeline}. This function produces an object of class \code{twdtwRaster} with the time series of multiple satellite bands. -```{r, echo = TRUE, eval = TRUE} -raster_timeseries = twdtwRaster(blue, red, nir, mir, evi, ndvi, - timeline = dates, doy = day_of_year) -``` +The classification is highly dependent on the quality of the temporal patterns. Therefore, it is useful to perform an analysis to assess the separability of the temporal pattern. Ideally, one would need patterns that, when applied to the set of unknown time series, produce consistent results (see the guidelines for single time series analysis in \autoref{classifying-a-time-series}). For this reason, before performing the land cover mapping, we perform a cross validation step. In this way, the users would assess the separability of their patterns before classifying a large data set. -We now need to identify the temporal patterns. Usually, this can be done using the collected field samples. In the next example we use the function \code{getTimeSeries} to get the time series of each field sample from our raster time series. The arguments of the function are a set of raster time series, a \code{data.frame} with spatial and temporal information about the fields samples (as in the object \code{field_samples} given above), and a \code{proj4string} with the projection information. The projection should follow the \code{sp::CRS} format. The result is an object of class \code{twdtwTimeSeries} with one time series for each field sample. +In the next example we use the function \code{getTimeSeries} to extract the time series of each field sample from our raster time series. The arguments of the function are a set of raster time series, a \code{data.frame} with spatial and temporal information about the fields samples (as in the object \code{field_samples} given above), and a \code{proj4string} with the projection information. The projection should follow the \code{sp::CRS} format. The result is an object of class \code{twdtwTimeSeries} with one time series for each field sample. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} field_samples_ts = getTimeSeries(raster_timeseries, y = field_samples, proj4string = proj_str) field_samples_ts ``` -After obtaining the time series associated to the field samples, we need to create the template patterns for each class. For this purpose, \pkg{dtwSat} provides the function \code{createPatterns}. This function fits a Generalized Additive Model (GAM) [Hastie:1986,Wood:2011] to the field samples and retrieves a smoothed temporal pattern for each band (*e.g.*, "blue", "red", "nir", "mir", "evi", and "ndvi"). We use the GAM because of its flexibility for non-parametric fits, with less rigorous assumptions on the relationship between response and predictor. This potentially provides better fit to satellite data than purely parametric models, due to the data's inter- and intra-annual variability. +To perform the cross-validation we use the function \code{twdtwCrossValidate}. This function splits the sample time series into training and validation sets using stratified sampling with a simple random sampling within each stratum, for details see \code{?caret::createDataPartition}. The function uses the training samples to create the temporal patterns and then classifies the remaining validation samples using \code{twdtwApply}. The results of the classification are used in the accuracy calculation. + +A Generalized Additive Model (GAM) [Hastie:1986,Wood:2011] generates the smoothed temporal patterns based on the training samples. We use the GAM because of its flexibility for non-parametric fits, with less rigorous assumptions on the relationship between response and predictor. This potentially provides better fit to satellite data than purely parametric models, due to the data's inter- and intra-annual variability. + +In the next example we set the arguments \code{times=100} and \code{p=0.1}, which creates 100 different data partitions, each with 10% of the samples for training and 90% for validation. The other arguments of this function are: the logistic weight function with steepness `-0.1` and midpoint `50` to \code{weight.fun}; the frequency of the temporal patterns to 8 days \code{freq=8}, and GAM smoothing formula to \code{formula = y ~ s(x)}, where function \code{s} sets up a spline model, with \code{x} the time and \code{y} a satellite band (for details see \code{?mgcv::gam} and \code{?mgcv::s}). The output is an object of class \code{twdtwCrossValidation} which includes the accuracy for each data partition. The object has two slots, the first called \code{partitions} has the index of the samples used for training, the second called \code{accuracy} has overall accuracy, user's accuracy, producer's accuracy, error matrix, and the data used in the calculation, i.e. reference classes, predicted classes, and TWDTW distance measure. + +```{r, echo = TRUE, eval = FALSE, results = 'markup'} +set.seed(1) +log_fun = logisticWeight(alpha=-0.1, beta=50) +cross_validation = twdtwCrossValidate(field_samples_ts, times=100, p=0.1, + freq = 8, formula = y ~ s(x, bs="cc"), weight.fun = log_fun) +``` +```{r, echo = FALSE, eval = TRUE} +load(system.file("lucc_MT/cross_validation.RData", package = "dtwSat")) +``` + +\autoref{fig:plot-accuracy} and \autoref{tab:cross-validation} show the 95% confidence interval of the mean for user\'s and producer\'s accuracy derived from the hundred-fold cross-validation analysis. The user\'s accuracy gives the confidence and the producer\'s accuracy gives the sensitivity of the method for each class. In our analysis all classes had high user\'s and producer\'s accuracy, meaning that TWDTW has high confidence and sensitivity for the classes included in the analysis. The cross-validation results show that if we randomly select 10% of our sampels to create temporal patterns we can get an overall accuracy of at least 97% in the classification of the remaining samples with 95% confidence level. +```{r plot-accuracy, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_width/2, fig.align='center', fig.cap='User\'s and producer\'s accuracy of the TWDTW cross-validation using 100 resampling-with-replacement. The plot shows the 95\\% confidence interval of the mean.', fig.pos='!ht'} +plot(cross_validation, conf.int=.95) +``` + +```{r, echo = FALSE, eval = TRUE, results = 'asis'} +twdtwXtable(cross_validation, conf.int=.95, digits = 2, caption="\\label{tab:cross-validation} User\'s and producer\'s accuracy of the TWDTW cross-validation using 100 resampling-with-replacement. The table shows the standard deviation ($\\sigma$) and the 95\\% confidence interval (ci) of the mean ($\\mu$).'", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` + + + +## Creating temporal patterns + +In the last section we observed that the land cover classes based on our samples are separable using the TWDTW algorithm with high confidence level. Now we randomly select 10% of our samples for training and keep the remaining 90% for validation. The first set of samples are used to create temporal patterns and classify the raster time series, and the second set of samples to assess the final maps. Ideally we would need a second independent set of samples to assess the map, but it would be very difficult to identify different crops without field work. Therefore, we use the same samples used in the cross-validation (\autoref{assessing-the-separability-of-temporal-patterns}). +```{r, echo = TRUE, eval = TRUE} +library(caret) +set.seed(1) +I = unlist(createDataPartition(field_samples[,"label"], p = 0.1)) +training_ts = subset(field_samples_ts, I) +validation_samples = field_samples[-I,] +``` + +We use the function \code{createPatterns} to produce the temporal patterns based on the training samples. For that, we need to set the desired temporal frequency of the patterns and the smoothing function for the GAM model. In the example below, we set \code{freq=8} to get temporal patterns with a frequency of 8 days, and the GAM smoothing formula \code{formula = y ~ s(x)}, such as in \autoref{assessing-the-separability-of-temporal-patterns}). -To produce the set of template patterns using the function \code{createPatterns}, we need to set the temporal frequency of the resulting patterns and the smoothing function for the GAM model. In the example below, we set \code{freq=8} to get temporal patterns with a frequency of 8 days. We also set the GAM smoothing formula to be \code{formula = y ~ s(x)}, where function \code{s} sets up a spline model, with \code{x} the time and \code{y} a satellite band (for details see \code{?mgcv::gam} and \code{?mgcv::s}). ```{r, echo = TRUE, eval = TRUE} temporal_patterns = - createPatterns(field_samples_ts, freq = 8, formula = y ~ s(x)) + createPatterns(training_ts, freq = 8, formula = y ~ s(x)) ``` -We use the plot method \code{type="patterns"} to show the results of the \code{createPatterns} in \autoref{fig:temporal-patterns}. -```{r temporal-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_width/1.5, fig.align='center', fig.pos='!h', fig.cap='Temporal patterns of forest, cotton-fallow, soybean-cotton, soybean-maize, and soybean-millet based on the ground truth samples.'} +The result of the function \code{createPatterns} is an object of the class \code{twdtwTimeSeries}. We use the plot method \code{type="patterns"} to show the results of the \code{createPatterns} in \autoref{fig:temporal-patterns}. +```{r temporal-patterns, echo = TRUE, eval = TRUE, fig.width=page_width, fig.height=page_width/1.5, fig.align='center', fig.pos='!h', fig.cap='Temporal patterns of Forest, Cotton-fallow, Soybean-cotton, Soybean-maize, and Soybean-millet based on the ground truth samples.'} plot(temporal_patterns, type = "patterns") + theme(legend.position = c(.8,.25)) ``` -After obtaining the template patterns for each land cover class, it is useful to perform a pre-classification analysis to assess their quality and their informational content. Ideally, one would need template patterns that, when applied to the set of unknown time series, produce consistent results. For this reason, it is advisable that the user performs a pre-classification step, along the lines of the individual analysis described in Section \ref{classifying-a-time-series}. In this way, the users would assess how good their patterns are before classifying a large data set. +Our method is not restricted to cases where the temporal patterns are obtained from the set of images, such as in the example above. Once can also use patterns from a different set of images or defined in other studies, as long as these temporal patterns stand for the study area and their bands match the bands in the unclassified images. ## Classifying the image time series -After obtaining a consistent set of temporal patterns, we use the function \code{twdtwApply} to run the TWDTW analysis for each pixel location in the raster time series. The input raster time series in the object \code{twdtwRaster} should be longer or have approximatly the same length as the temporal patterns. This function retrieves an object of class \code{twdtwRaster} with the TWDTW dissimilarity measure of the temporal patterns for each time period. The arguments \code{overwrite} and \code{format} are passed to \code{raster::writeRaster}. The arguments \code{weight.fun} and \code{overlap} are described in Section \ref{classifying-a-time-series}. Here we set the parameters of the time weight (logistic function) base on our the experience about the phenological cycle of the vegetation in the study area. In the next example, we classify the raster time series using the temporal patterns in \code{temporal_patterns} obtained as described above. The result is a \code{twdtwRaster} with five layers; each layer contains the TWDTW dissimilarity measure for one temporal pattern over time. We use the plot type \code{distance} to illustrate the TWDTW dissimilarity for each temporal pattern in 2013, cf. Figure \ref{fig:plot-dissmilarity2013}. +After obtaining a consistent set of temporal patterns, we use the function \code{twdtwApply} to run the TWDTW analysis for each pixel location in the raster time series. The input raster time series in the object \code{twdtwRaster} should be longer or have approximatly the same length as the temporal patterns. This function retrieves an object of class \code{twdtwRaster} with the TWDTW dissimilarity measure of the temporal patterns for each time period. The arguments \code{overwrite} and \code{format} are passed to \code{raster::writeRaster}. The arguments \code{weight.fun} and \code{overlap} are described in \autoref{classifying-a-time-series}. Here we set the parameters of the time weight (logistic function) base on our the experience about the phenological cycle of the vegetation in the study area. In the next example, we classify the raster time series using the temporal patterns in \code{temporal_patterns} obtained as described above. The result is a \code{twdtwRaster} with five layers; each layer contains the TWDTW dissimilarity measure for one temporal pattern over time. We use the plot type \code{distance} to illustrate the TWDTW dissimilarity for each temporal pattern in 2008, cf. \autoref{fig:plot-dissmilarity2008}. ```{r, echo = TRUE, eval = TRUE, results = 'markup'} log_fun = logisticWeight(alpha=-0.1, beta=50) twdtw_dist = twdtwApply(x = raster_timeseries, y = temporal_patterns, overlap = 0.5, weight.fun = log_fun, overwrite=TRUE, format="GTiff") ``` - -```{r plot-dissmilarity2013, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Illustration of the TWDTW dissimilarity from each temporal pattern in 2013. The blue areas are more similar to the pattern and the red areas are less similar to the pattern.', fig.pos='!h'} -plot(x = twdtw_dist, type="distance", time.levels = 6) +```{r plot-dissmilarity2008, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Illustration of the TWDTW dissimilarity from each temporal pattern in 2008. The blue areas are more similar to the pattern and the red areas are less similar to the pattern.', fig.pos='!ht'} +plot(x = twdtw_dist, type="distance") ``` -The results of the example above can be used to create categorical land cover maps. The function \code{twdtwClassify} selects the most similar pattern for each time period and retrieves a \code{twdtwRaster} object with the time series of land use maps. The resulting object includes two layers, the first has the classified categorical maps and the second has the TWDTW dissimilarity measure. -```{r, echo = TRUE, eval = TRUE} -land_use_maps = twdtwClassify(twdtw_dist, format="GTiff", overwrite=TRUE) +The results of the example above can be used to create categorical land cover maps. The function \code{twdtwClassify} selects the most similar pattern for each time period and retrieves a \code{twdtwRaster} object with the time series of land cover maps. The resulting object includes two layers, the first has the classified categorical maps and the second has the TWDTW dissimilarity measure. +```{r, echo = TRUE, eval = TRUE, results = 'markup'} +land_cover_maps = twdtwClassify(twdtw_dist, format="GTiff", overwrite=TRUE) ``` ## Looking at the classification results -The classification results can be visualised using the \code{plot} methods of the class \code{twdtwRaster}, which supports four plot types: "maps", "area", "changes", and "distance". The \code{type="maps"} shows the land cover classification maps for each period, cf. Figure \ref{fig:plot-map}. -```{r plot-map, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Land use maps for each year from 2008 to 2013.', fig.pos='!h'} -plot(x = land_use_maps, type = "maps") +The classification results can be visualised using the \code{plot} methods of the class \code{twdtwRaster}, which supports four plot types: "maps", "area", "changes", and "distance". The \code{type="maps"} shows the land cover classification maps for each period, cf. \autoref{fig:plot-map}. +```{r plot-map, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Land cover maps for each year from 2008 to 2013.', fig.pos='!ht'} +plot(x = land_cover_maps, type = "maps") ``` -The next example shows the accumulated area for each class over time, using \code{type="area"}, cf. Figure \ref{fig:plot-area}. -```{r plot-area, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Percentage of area for each land use class from 2008 to 2013.', fig.pos='!h'} -plot(x = land_use_maps, type = "area") +The next example shows the accumulated area for each class over time, using \code{type="area"}, cf. \autoref{fig:plot-area}. +```{r plot-area, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Percentage of area for each land cover class from 2008 to 2013.', fig.pos='!ht'} +plot(x = land_cover_maps, type = "area") ``` -Users can also view the land cover transition for each time period, by setting \code{type="changes"}. For each land cover class and each period, the plot shows gains and losses in area from the other classes. This is the visual equivalent of a land transition matrix, cf. Figure \ref{fig:plot-change}. +Users can also view the land cover transition for each time period, by setting \code{type="changes"}. For each land cover class and each period, the plot shows gains and losses in area from the other classes. This is the visual equivalent of a land transition matrix, cf. \autoref{fig:plot-change}. ```{r plot-change, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Gains and losses in area from the other classes. The $y$ axis shows the actual class; the positive direction of $x$ axis shows the gains and the negative direction of $x$ axis shows the losses of the classes indicated in $y$. The colors indicate from/to which classes the gains/losses belong.', fig.pos='!h'} -plot(x = land_use_maps, type = "changes") +plot(x = land_cover_maps, type = "changes") ``` -We can also look at the dissimilarity of each classified pixel setting \code{type="distance"}. This plot can give a measure of the uncertainty of the classification of each pixel for each time period, cf. Figure \ref{fig:plot-dissmilarity}. -```{r plot-dissmilarity, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='TWDTW dissimilarity measure for each pixel over each classified period. The blue areas have high confidence and the red areas have low confidence in the classification.', fig.pos='!h'} -plot(x = land_use_maps, type="distance") +We can also look at the dissimilarity of each classified pixel setting \code{type="distance"}. This plot can give a measure of the uncertainty of the classification of each pixel for each time period, cf. \autoref{fig:plot-dissmilarity}. +```{r plot-dissmilarity, echo = TRUE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='TWDTW dissimilarity measure for each pixel over each classified period. The blue areas have high confidence and the red areas have low confidence in the classification.', fig.pos='!ht'} +plot(x = land_cover_maps, type="distance") ``` -## Assessing the classification accuracy -In this section we show how to assess the accuracy of the TWDTW method for land cover classification. To do this, we split the ground truth samples into training and validation sets, using the function \code{splitDataset} from the package \pkg{dtwSat}. This function splits set of time series in the object \code{twdtwTimeSeries} for training and validation. The argument \code{p} defines the percentage used for training and the argument \code{times} gives the number of different partitions to create. This is a a stratified sampling with a simple random sampling within each stratum, see \code{?createDataPartition} for details. In the next example we create 100 different partitions of the data. Each partition uses 10% of the data for training and 90% for validation. The output is a list with 100 different data partitions; each partition has the temporal patterns based on the training samples and a set of time series for validation. -```{r, echo = TRUE, eval = FALSE} -set.seed(1) -partitions = splitDataset(field_samples_ts, p=0.1, times=100, - freq = 8, formula = y ~ s(x, bs="cc")) -``` -For each data partition we run the TWDTW analysis to classify the set of validation time series using the trained temporal patterns. The result is a list of \code{twdtwMatches} objects with the classified set of time series for each data partition. To compute the *User's Accuracy* (UA) and *Producer's Accuracy* (PA) of the classified time series we use the function \code{dtwSat::twdtwCrossValidation} that retrieves a \code{data.frame} with the accuracy assessment for all data partitions. -```{r, echo = TRUE, eval = FALSE, results = 'markup'} -log_fun = logisticWeight(alpha=-0.1, beta=50) -twdtw_res = lapply(partitions, function(x){ - res = twdtwApply(x = x$ts, y = x$patterns, weight.fun = log_fun, n=1) - twdtwClassify(x = res) -}) -assessment = twdtwCrossValidation(twdtw_res) -head(assessment, 5) -``` -```{r, echo = FALSE, eval = TRUE} -load(system.file("lucc_MT/cross_validation.RData", package = "dtwSat")) -``` - - -Figure \ref{fig:plot-accuracy} shows the average $\mu$ and standard deviation $\sigma$ of *user\'s* and *producer\'s accuracy* based on a bootstrap simulation of 100 different data partitions using resampling-with-replacement. The *user\'s accuracy* gives the confidence and the *producer\'s accuracy* gives the sensitivity of the method for each class. In our analysis all classes had high *user\'s* and *producer\'s accuracy*, meaning that TWDTW has high confidence and sensitivity for the classes included in the analysis. The average, standard deviation, and the 99\% confidence interval is also shown in Table \ref{tab:assessment}. -```{r plot-accuracy, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_width/2, fig.align='center', fig.cap='User\'s Accuracy (UA) and Producer\'s Accuracy (PA) of the TWDTW method for land cover classification. The plot shows the averages and their confidence interval for 99\\%.', fig.pos='!h'} -df = melt(assessment[,-1], id="label") -df$variable = factor(df$variable, levels = c("UA", "PA"), labels = c("User's Accuracy", "Producer's Accuracy")) -ggplot(df, aes(x=label, y=value)) + - stat_summary(fun.data="mean_cl_boot", fun.args=list(conf.int = .99), - width=0.5, geom="crossbar", size=0.1, fill = "gray") + - geom_point(size=0.2) + facet_grid(. ~ variable) + - scale_y_continuous(limits = c(0,1), labels = percent, breaks = seq(0,1,.2)) + - xlab("") + ylab("Accuracy") + coord_flip() +In this section we show how to assess the classification. \pkg{dtwSat} provides a function called \code{twdtwAssess}, which computes a set of accuracy metrics, and adjusted area such as proposed by @Olofsson:2013 and @Olofsson:2014. The inputs of this function are the classified map (an object of class \code{twdtwRaster}), and a set of samples for validation (an object of class \code{data.frame} or \code{sp::SpatialPointsDataFrame}). Besides coordinates, the samples should also have starting dates, ending dates, and lables compatible with the labels in the map (for details see \autoref{input-data}). The output of \code{twdtwAssess} is an object of class \code{twdtwAssessment}, which includes four slots: 1) \code{accuracyByPeriod} is a list of metrics for each time period, including overall accuracy, user's accuracy, produce's accuracy, error matrix (confusion matrix), and adjusted area; 2) \code{accuracySummary} has the accuracy and adjusted area accumulated over all time periods; 3) \code{data} is a \code{SpatialPointsDataFrame} with sample ID, period ID, starting date, ending date, reference label, predicted label, and TWDTW distance; and 4) \code{map} is a twdtwRaster with the raster maps. The next example uses \code{twdtwAssess} to compute the accuracy of the maps (\code{land_cover_maps}) using the validation samples (\code{validation_samples}) with a 95% confidence level. +```{r, echo = TRUE, eval = TRUE} +maps_assessment = twdtwAssess(land_cover_maps, y = validation_samples, + proj4string = proj_str, conf.int=.95) ``` + + +The results of the assessment in \autoref{tab:map-error-matrix}, \ref{tab:map-accuracy}, and \ref{tab:map-adjusted-area} are accumulated over the whole time period, i.e. the total mapped area is equal to the surface area times the number of maps. It is possible to assess and visualise each period independently from each other. However, our samples are irregularly distributed over time and some classes do not have samples in all time period, which limits the analysis of each time period independently from each other. + ```{r, echo = FALSE, eval = TRUE, results = 'asis'} -assess_mean = aggregate(assessment[, c("UA","PA")], list(assessment$label), mean) -assess_sd = aggregate(assessment[, c("UA","PA")], list(assessment$label), sd) -l_names = levels(assessment$label) -names(l_names) = l_names -ic_ua = t(sapply(l_names, function(i) 100*mean_cl_boot(x = assessment$UA[assessment$label==i], conf.int = .99)))[,-1] -ic_pa = t(sapply(l_names, function(i) 100*mean_cl_boot(x = assessment$PA[assessment$label==i], conf.int = .99)))[,-1] +twdtwXtable(maps_assessment, table.type="errormatrix", digits = 0, rotate.col = TRUE, caption="\\label{tab:map-error-matrix}Error matrix of the map classification based on TWDTW analysis. The area is in the map unit, in this case $m^2$. $w$ is the proportion of area mapped for each class.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` -assess_table = data.frame( - Class = assess_mean$Group.1, - - MUA = sprintf("%.2f", round(100*assess_mean$UA,2)), - SDUA = sprintf("(%.2f)", round(100*assess_sd$UA,2)), - CIUA = sprintf("[%.2f-%.2f]", round(as.numeric(ic_ua[,1]),2), round(as.numeric(ic_ua[,2]),2)), +As we can see in \autoref{tab:map-error-matrix} only nine samples were misclassified, all of them from the reference class "Soybean-cotton". From these samples six were classified as "Soybean-maize", and three as "Cotton-fallow". As we see in \autoref{tab:map-accuracy} the only class with producer\'s accuracy lower than $100\%$ was "Soybean-cotton", reaching $72\%$ with high uncertainty ($\pm13\%$). The user\'s accuracy for all classes was higer than $95\%$, with maximun uncertainty of $\pm5\%$. To visualise the misclassified samples on top of the maps we can use the plot \code{type="map"} for objects of class \code{twdtwAssessment}, such that `plot(x = maps_assessment, type="map", samples="incorrect")`. The user can also set the argument \code{samples} to see correctly classified samples \code{samples="correct"}, or to see all samples \code{samples="all"}. - MPA = sprintf("%.2f", round(100*assess_mean$PA,2)), - SDPA = sprintf("(%.2f)", round(100*assess_sd$PA,2)), - CIPA = sprintf("[%.2f-%.2f]", round(as.numeric(ic_pa[,1]),2), round(as.numeric(ic_pa[,2]),2)) - ) +The \autoref{fig:plot-map-incorrect-samples} shows that the misclassified samples are all in the map from 2012. The six samples of "Soybean-cotton" classified as "Soybean-maize" are within a big area of "Soybean-maize" and the three samples of "Soybean-cotton" classified as "Cotton-fallow" are near the border between this two classes. This errors might be related to the mixture of different classes in the same pixel. -x_assess = xtable::xtable(assess_table, - format = tab_format, digits = 2, label = "tab:assessment", alig=c("l","c","c","c","c","c","c","c"), - caption="User\'s and Producer\'s Accuracy of the land use classification based on TWDTW analysis. $\\mu$ is the average accuracy, $\\sigma$ the standard deviation, and CI is the confidence interval of 99\\% using 100 resampling-with-replacement.") +```{r plot-map-incorrect-samples, echo = FALSE, eval = TRUE, fig.width=page_width, fig.align='center', fig.cap='Incorrect classified samples.', fig.pos='!ht'} +plot(x = maps_assessment, type="map", samples="incorrect") +``` + +```{r, echo = FALSE, eval = TRUE, results = 'asis'} +twdtwXtable(maps_assessment, table.type="accuracy", show.prop = TRUE, digits = 2, rotate.col = TRUE, caption="\\label{tab:map-accuracy}Accuracy and error matrix in proportion of area of the classified map.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` + +In \autoref{tab:map-adjusted-area} we can see the mapped and the adjusted area. This is the accumulated area over the whole period, i.e. the sum of all maps from 2008 to 2013. As the "Forest" and "Soybean-millet" did not have omission ($100\%$ producer's accuracy) or comission ($100\%$ user's accuracy) erros, we immediately see that their mapped area is equal to their adjusted area (\autoref{tab:map-adjusted-area}). To help the analysis of the other classes we use the plot \code{type="area"} for class \code{twdtwAssessment}, such that `plot(x = maps_assessment, type="area", perc=FALSE)`. \autoref{fig:plot-area-and-uncertainty} shows the accumulated area mapped and adjusted for all classes. In this figure we see that our method overestimated the area of "Soybean-maize", i.e. the mapped area ($110173564\;m^2$) is bigger than the adjusted area ($104927204\;m^2$) plus the confidence interval $4113071\;m^2$. Meanwhile we underestimated the area of "Soybean-cotton", i.e. its mapped area ($18782634\;m^2$) is smaller than the adjusted area ($26260270\;m^2$) plus the confidence interval ($4805205\;m^2$). The mapped area of "Cotton-fallow" ($47600561\;m^2$) is within the confidence interval of the adjusted area ($45369285\pm2484480\;m^2$). To improve the accuracy assessment and area estimations the field samples should be better distributed over time, which would also allow for better land cover changes assessment. -addtorow = list() -addtorow$pos = list(0) -addtorow$command = paste("Class & \\multicolumn{3}{c}{User's Accuracy (UA) \\%} & \\multicolumn{3}{c}{Producer's Accuracy (PA)\\%}\\\\", paste(c("","$\\mu$","$\\sigma$","CI","$\\mu$","$\\sigma$","CI"), collapse="&"), "\\\\", collapse = "") +```{r, echo = FALSE, eval = TRUE, results = 'asis'} +twdtwXtable(maps_assessment, table.type="area", digits = 0, rotate.col = TRUE, caption="\\label{tab:map-adjusted-area}Mapped and adjusted, accumulated over the whole period, i.e. the sum from the sum of the maps from 2008 to 2013. The area is in the map unit, in this case $m^2$.", comment = FALSE, caption.placement = "bottom", table.placement="!ht") +``` -xtable::print.xtable(x_assess, add.to.row=addtorow, include.colnames = FALSE, include.rownames = FALSE, - comment = FALSE, caption.placement = "bottom") +```{r plot-area-and-uncertainty, echo = FALSE, eval = TRUE, fig.width=page_width, fig.height=page_height/2.7, fig.align='center', fig.cap='Mapped and adjusted, accumulated over the whole period, i.e. the sum from the sum of the maps from 2008 to 2013. The area is in the map unit, in this case $m^2$.', fig.pos='!ht'} +plot(x = maps_assessment, type="area", perc=FALSE) ``` @@ -533,20 +507,24 @@ xtable::print.xtable(x_assess, add.to.row=addtorow, include.colnames = FALSE, in # Conclusions and Discussion -Nowadays, there are large open archives of Earth Observation data, but few open source methods for analysing them. With this motivation, this paper provides guidance on how to use the Time-Weighed Dynamic Time Warping (TWDTW) method for remote sensing applications. As we have discussed in a companion paper [@Maus:2016], the TWDTW method is well suited for land cover change analysis of satellite image time series. +The overall accuracy of the classification with a 95% confidence level is within 97% and 99%. With same confidence level, user's and producer's accuracy are between 90% and 100% for all classes, except for "Soybean-cotton", which has wide confidence interval for user's accuracy, between 59% and 85%. A small sample size will likely have large confidence intervals [@Foody:2009], therefore, this analysis could be improved by increasing the number of "Soybean-cotton" samples. In addition, our access to field information is limited to a set of samples irregularly distributed over time, which are not enough to assess each mapped period independently from each other. Nevertheless, the results of the accuracy assessment show that the TWDTW has great potential to classify different crop types. -The main goal of \pkg{dtwSat} package is to make TWDTW accessible for researchers. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and evaluating the results. The current version of the \pkg{dtwSat} package provides a pixel-based time series classification method. We envisage that future versions of the package could include local neighborhoods to reduce border effects and improve classification homogeneity. +DTW based approaches have achieved good results for land cover classification [@Petitjean:2012; @Maus:2016], however, a reduced number of points in the time series will negatively impact the accuracy. Remotely sensed images often present noise and poor coverage due to clouds, aerosol load, surface directional effects, and sensor problems. This leads to large amount of gaps in satellite image time series. Therefore, methods that deal with irregular temporal sampling, i.e. irregular sampling intervals, have great potential to fully exploit the available satellite images archive. DTW is known to be one of the most robust methods for irregular time series [@Keogh:2005; @Tormene:2009]. It was successfully applied for satellite time series clustering using FORMOSAT-2 [@Petitjean:2012] and using MODIS [@Maus:2016]. @Petitjean:2012, for example, showed that clustering based on DTW is consistent even when there are several images missing per year because of cloud cover. However, the effect of a reduced number of samples in the time needs to be better evaluated in order to point out the limiting gap size for satellite image time series analysis using DTW based methods. -The \pkg{dtwSat} package provides two in-built functions for linear and logistic time weight. In the current version of the package the parameters of the weight functions are set manually to the same value for all land use/cover classes. Future versions of the package could include methods to search for the best parameters to be set class-by-class using field data. +The DTW approaches will search for the matches of a temporal pattern, therefore the number of points in the time series should represent the phenological cycles of different land cover types. The number of available observations might be a limitation for sensors with lower temporal resolution, such as Landsat. We believe that this limitation could be addressed, for example, by combining TWDTW analysis with pixel-based compositing techniques [@Griffiths:2013; @White:2014]. These approaches have become more popular with the opening of the USGS Landsat archive and could be used to increase the availability of gap-free time series [@Gomez:2016]. -To aim for maximum usage by the scientific community, the \pkg{dtwSat} package described in this paper works with well-known R data classes such as provided by packages \pkg{zoo} and \pkg{raster}. We are planning improvements, so that \pkg{dtwSat} can be combined with array databases, such as SciDB [@Stonebraker:2013]. We believe that combining array databases with image time series analysis software such as presented here is one way forward to scaling the process of information extracting to very large Earth Observation data. +\pkg{dtwSat} provides a dissimilarity measure in the n-dimensional space, allowing multispectral satellite image time series analysis. Our experience using MODIS data sets has shown that n-dimensional analysis, i.e. using RED, NIR, NDVI, EVI, and NDVI, increases the separability among classes when compared to single band analysis, for example using only EVI or NDVI. Further studies on multispectral data using TWDTW analysis will help to optimize the selection of bands. +The main goal of \pkg{dtwSat} package is to make TWDTW accessible for researchers. The package supports the full cycle of land cover classification using image time series, ranging from selecting temporal patterns to visualising and assessing the results. The current version of the \pkg{dtwSat} package provides a pixel-based time series classification method. We envisage that future versions of the package could include local neighborhoods to reduce border effects and improve classification homogeneity. +The \pkg{dtwSat} package provides two in-built functions for linear and logistic time weight. In the current version of the package the parameters of the weight functions are set manually to the same value for all land cover classes. Future versions could include methods to search for the best parameters for each land cover class using field data. +Nowadays, there are large open archives of Earth Observation data, but few open source methods for analysing them. With this motivation, this paper provides guidance on how to use the Time-Weighed Dynamic Time Warping (TWDTW) method for remote sensing applications. As we have discussed in a companion paper [@Maus:2016], the TWDTW method is well suited for land cover change analysis of satellite image time series. -\section*{Acknowledgments} -Victor Maus has been supported by the Institute for Geoinformatics, University of Münster (Germany), and by the Earth System Science Center, National Institute for Space Research (Brazil). Part of the research was developed in the Young Scientists Summer Program at the International Institute for Applied Systems Analysis, Laxenburg (Austria). Gilberto Câmara's term as Brazil Chair at IFGI has been supported by CAPES (grant 23038.007569\/2012-16). Gilberto's work is also supported by FAPESP e-science program (grant 2014-08398-6) and CNPq (grant 312151\/2014-4). +The TWDTW algorithm is computationally intensive and for large areas one should consider parallel processing. The algorithm is pixel time series based, i.e. each time series is processed independently from each other, and therefore, it can be easily parallelized. To aim for maximum usage by the scientific community, the \pkg{dtwSat} package described in this paper works with well-known \proglang{R} data classes such as provided by \pkg{raster}, which offers the option to work with raster data sets stored on disk that are too large to be loaded into memory (RAM) at once [@Hijmans:2015]. We are planning improvements, so that \pkg{dtwSat} can also be combined with array databases, such as SciDB [@Stonebraker:2013]. We believe that combining array databases with image time series analysis software such as presented here is one way forward to scaling the process of information extracting from very large Earth Observation data. - + +\section*{Acknowledgments} +Victor Maus has been supported by the Institute for Geoinformatics, University of Münster (Germany), and by the Earth System Science Center, National Institute for Space Research (Brazil). Gilberto Camara's term as Brazil Chair at IFGI has been supported by CAPES (grant 23038.007569\/2012-16). Gilberto's work is also supported by FAPESP e-science program (grant 2014-08398-6) and CNPq (grant 312151\/2014-4). diff --git a/vignettes/references.bib b/vignettes/references.bib index af97bfb..dd88c03 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -601,3 +601,76 @@ @article{Hastie:1986 volume = {1}, year = {1986} } + +@ARTICLE{Tormene:2009, + title = {Matching incomplete time series with dynamic time warping: an algorithm and an application to post-stroke rehabilitation}, + journal = {Artificial Intelligence in Medicine}, + volume = {45}, + number = {1}, + pages = {11 - 34}, + year = {2009}, + doi = {10.1016/j.artmed.2008.11.007}, + author = {Paolo Tormene and Toni Giorgino and Silvana Quaglini and Mario Stefanelli} +} + +@article{Olofsson:2014, + title = {Good practices for estimating area and assessing accuracy of land change}, + journal = {Remote Sensing of Environment}, + volume = {148}, + number = {}, + pages = {42 - 57}, + year = {2014}, + issn = {0034-4257}, + doi = {10.1016/j.rse.2014.02.015}, + author = {Pontus Olofsson and Giles M. Foody and Martin Herold and Stephen V. Stehman and Curtis E. Woodcock and Michael A. Wulder} +} + +@ARTICLE{Olofsson:2013, + author = {Pontus Olofsson and Giles M. Foody and Stephen V. Stehman and Curtis E. Woodcock}, + title = {Making better use of accuracy data in land change studies: Estimating accuracy and area and quantifying uncertainty using stratified estimation}, + journal = {Remote Sensing of Environment}, + volume = {129}, + number = {}, + pages = {122 - 131}, + year = {2013}, + note = {}, + doi = {10.1016/j.rse.2012.10.031} +} + +@article{White:2014, + author = {J. C. White and M. A. Wulder and G. W. Hobart and J. E. Luther and T. Hermosilla and P. Griffiths and N. C. Coops and R. J. Hall and P. Hostert and A. Dyk and L. Guindon}, + title = {Pixel-Based Image Compositing for Large-Area Dense Time Series Applications and Science}, + journal = {Canadian Journal of Remote Sensing}, + volume = {40}, + number = {3}, + pages = {192-212}, + year = {2014}, + doi = {10.1080/07038992.2014.945827}, +} + +@article{Gomez:2016, + title = {Optical remotely sensed time series data for land cover classification: A review}, + journal = {{ISPRS} Journal of Photogrammetry and Remote Sensing}, + volume = {116}, + number = {}, + pages = {55 - 72}, + year = {2016}, + note = {}, + issn = {0924-2716}, + doi = {10.1016/j.isprsjprs.2016.03.008}, + author = {Cristina G\'{o}mez and Joanne C. White and Michael A. Wulder}, +} + +@article{Foody:2009, + title = {Classification accuracy comparison: Hypothesis tests and the use of confidence intervals in evaluations of difference, equivalence and non-inferiority}, + journal = {Remote Sensing of Environment}, + volume = {113}, + number = {8}, + pages = {1658 - 1663}, + year = {2009}, + issn = {0034-4257}, + doi = {10.1016/j.rse.2009.03.014}, + author = {Giles M. Foody} +} + +