diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index 194ecca00..ed3f5a1cd 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -193,7 +193,7 @@ rst_boundingbox rst_clip ******** -.. function:: rst_clip(tile, geometry) +.. function:: rst_clip(tile, geometry, cutline_all_touched) Clips :code:`tile` with :code:`geometry`, provided in a supported encoding (WKB, WKT or GeoJSON). @@ -201,15 +201,26 @@ rst_clip :type tile: Column (RasterTileType) :param geometry: A column containing the geometry to clip the raster to. :type geometry: Column (GeometryType) + :param cutline_all_touched: A column to specify pixels boundary behavior. + :type cutline_all_touched: Column (BooleanType) :rtype: Column: RasterTileType .. note:: - Notes + **Notes** - :code:`geometry` is expected to be: - - in the same coordinate reference system as the raster. + The :code:`geometry` parameter is expected to be: + - expressed in the same coordinate reference system as the raster. - a polygon or a multipolygon. + The :code:`cutline_all_touched` parameter: + - Optional: default is true. this is a GDAL warp config for the operation. + - If set to true, the pixels touching the geometry are included in the clip, + regardless of half-in or half-out; this is important for tessellation behaviors. + - If set to false, only at least half-in pixels are included in the clip. + + The actual GDAL command to clip looks something like the following (after some setup): + :code:`"gdalwarp -wo CUTLINE_ALL_TOUCHED= -cutline -crop_to_cutline"` + The output raster tiles will have: - the same extent as the input geometry. - the same number of bands as the input raster. diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index 464e945fe..a4a051179 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -147,18 +147,20 @@ def rst_boundingbox(raster_tile: ColumnOrName) -> Column: ) -def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName) -> Column: +def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName, cutline_all_touched: Any = True) -> Column: """ - Clips the raster to the given supported geometry (WKT, WKB, GeoJSON). - The result is Mosaic raster tile struct column to the clipped raster. - The result is stored in the checkpoint directory. + Clips `raster_tile` to the given supported `geometry` (WKT, WKB, GeoJSON). + The result is a Mosaic raster tile representing the clipped raster. Parameters ---------- raster_tile : Column (RasterTileType) Mosaic raster tile struct column. geometry : Column (StringType) - The geometry to clip the raster to. + The geometry to clip the tile to. + cutline_all_touched : Column (BooleanType) + optional override to specify whether any pixels touching + cutline should be included vs half-in only, default is true Returns ------- @@ -166,10 +168,14 @@ def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName) -> Column: Mosaic raster tile struct column. """ + if type(cutline_all_touched) == bool: + cutline_all_touched = lit(cutline_all_touched) + return config.mosaic_context.invoke_function( "rst_clip", pyspark_to_java_column(raster_tile), pyspark_to_java_column(geometry), + pyspark_to_java_column(cutline_all_touched) ) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index b81958cd2..b989b4eee 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -81,6 +81,13 @@ case class MosaicRasterGDAL( val gt = getGeoTransform val sourceCRS = getSpatialReference + + val destCRSEPSGCode = (destCRS.GetAuthorityName(null), destCRS.GetAuthorityCode(null)) match { + case (null, _) => 0 + case (name: String, code: String) if name == "EPSG" => code.toInt + case _ => 0 + } + val transform = new osr.CoordinateTransformation(sourceCRS, destCRS) val bbox = geometryAPI.geometry( @@ -96,7 +103,9 @@ case class MosaicRasterGDAL( val geom1 = org.gdal.ogr.ogr.CreateGeometryFromWkb(bbox.toWKB) geom1.Transform(transform) - geometryAPI.geometry(geom1.ExportToWkb(), "WKB") + val mosaicGeom = geometryAPI.geometry(geom1.ExportToWkb(), "WKB") + mosaicGeom.setSpatialReference(destCRSEPSGCode) + mosaicGeom } /** @return The diagonal size of a raster. */ diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/RasterClipByVector.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/RasterClipByVector.scala index ddae2fed2..9468426e3 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/RasterClipByVector.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/RasterClipByVector.scala @@ -8,6 +8,8 @@ import com.databricks.labs.mosaic.core.raster.operator.gdal.GDALWarp import com.databricks.labs.mosaic.utils.PathUtils import org.gdal.osr.SpatialReference +import java.util.Locale + /** * RasterClipByVector is an object that defines the interface for clipping a * raster by a vector geometry. @@ -31,25 +33,35 @@ object RasterClipByVector { * The geometry CRS. * @param geometryAPI * The geometry API. + * @param cutlineAllTouched + * Whether pixels touching cutline are automatically included (true) or + * included only the cutline crosses the centre point of the pixel + * (false) default: true. * @return * A clipped raster. */ - def clip(raster: MosaicRasterGDAL, geometry: MosaicGeometry, geomCRS: SpatialReference, geometryAPI: GeometryAPI): MosaicRasterGDAL = { + def clip( + raster: MosaicRasterGDAL, + geometry: MosaicGeometry, + geomCRS: SpatialReference, + geometryAPI: GeometryAPI, + cutlineAllTouched: Boolean = true + ): MosaicRasterGDAL = { val rasterCRS = raster.getSpatialReference val outShortName = raster.getDriversShortName val geomSrcCRS = if (geomCRS == null) rasterCRS else geomCRS + val cutlineToken = cutlineAllTouched.toString.toUpperCase(Locale.US) + val resultFileName = PathUtils.createTmpFilePath(GDAL.getExtension(outShortName)) val shapeFileName = VectorClipper.generateClipper(geometry, geomSrcCRS, rasterCRS, geometryAPI) // For -wo consult https://gdal.org/doxygen/structGDALWarpOptions.html - // SOURCE_EXTRA=3 is used to ensure that when the raster is clipped, the - // pixels that touch the geometry are included. The default is 1, 3 seems to be a good empirical value. val result = GDALWarp.executeWarp( resultFileName, Seq(raster), - command = s"gdalwarp -wo CUTLINE_ALL_TOUCHED=TRUE -cutline $shapeFileName -crop_to_cutline" + command = s"gdalwarp -wo CUTLINE_ALL_TOUCHED=$cutlineToken -cutline $shapeFileName -crop_to_cutline" ) VectorClipper.cleanUpClipper(shapeFileName) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala index 41d98fbcf..421155fde 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/clip/VectorClipper.scala @@ -64,8 +64,9 @@ object VectorClipper { val projectedGeom = geometry.osrTransformCRS(srcCrs, dstCrs, geometryAPI) val geom = ogr.CreateGeometryFromWkb(projectedGeom.toWKB(2)) + geom.AssignSpatialReference(dstCrs) - val geomLayer = shpDataSource.CreateLayer("geom") + val geomLayer = shpDataSource.CreateLayer("geom", dstCrs) val idField = new org.gdal.ogr.FieldDefn("id", OFTInteger) geomLayer.CreateField(idField) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala index f9ea405f6..6fee89aaa 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala @@ -5,34 +5,39 @@ import com.databricks.labs.mosaic.core.raster.api.GDAL import com.databricks.labs.mosaic.core.raster.operator.clip.RasterClipByVector import com.databricks.labs.mosaic.core.types.RasterTileType import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile -import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} -import com.databricks.labs.mosaic.expressions.raster.base.Raster1ArgExpression +import com.databricks.labs.mosaic.expressions.base.WithExpressionInfo +import com.databricks.labs.mosaic.expressions.raster.base.Raster2ArgExpression import com.databricks.labs.mosaic.functions.MosaicExpressionConfig import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback -import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, NullIntolerant} +import org.apache.spark.sql.types.BooleanType + +import scala.util.Try /** The expression for clipping a raster by a vector. */ case class RST_Clip( rastersExpr: Expression, geometryExpr: Expression, + cutlineAllTouchedExpr: Expression, expressionConfig: MosaicExpressionConfig -) extends Raster1ArgExpression[RST_Clip]( +) extends Raster2ArgExpression[RST_Clip]( rastersExpr, geometryExpr, + cutlineAllTouchedExpr, returnsRaster = true, expressionConfig = expressionConfig ) with NullIntolerant with CodegenFallback { + val geometryAPI: GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI) + override def dataType: org.apache.spark.sql.types.DataType = { GDAL.enable(expressionConfig) RasterTileType(expressionConfig.getCellIdType, rastersExpr, expressionConfig.isRasterUseCheckpoint) } - val geometryAPI: GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI) - /** * Clips a raster by a vector. * @@ -40,14 +45,17 @@ case class RST_Clip( * The raster to be used. * @param arg1 * The vector to be used. + * @param arg2 + * cutline handling (boolean). * @return * The clipped raster. */ - override def rasterTransform(tile: MosaicRasterTile, arg1: Any): Any = { + override def rasterTransform(tile: MosaicRasterTile, arg1: Any, arg2: Any): Any = { val geometry = geometryAPI.geometry(arg1, geometryExpr.dataType) val geomCRS = geometry.getSpatialReferenceOSR + val cutlineAllTouched = arg2.asInstanceOf[Boolean] tile.copy( - raster = RasterClipByVector.clip(tile.getRaster, geometry, geomCRS, geometryAPI) + raster = RasterClipByVector.clip(tile.getRaster, geometry, geomCRS, geometryAPI, cutlineAllTouched) ) } @@ -60,7 +68,7 @@ object RST_Clip extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster tile clipped by provided vector. + |_FUNC_(expr1, expr2) - Returns a raster tile clipped by provided vector. |""".stripMargin override def example: String = @@ -72,8 +80,20 @@ object RST_Clip extends WithExpressionInfo { | ... | """.stripMargin - override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { - GenericExpressionFactory.getBaseBuilder[RST_Clip](2, expressionConfig) + override def builder(exprConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => + { + def checkCutline(cutline: Expression): Boolean = Try(cutline.eval().asInstanceOf[Boolean]).isSuccess + val noCutlineArg = new Literal(true, BooleanType) // default is true for tessellation needs + + children match { + // Note type checking only works for literals + case Seq(input, vector) => + RST_Clip(input, vector, noCutlineArg, exprConfig) + case Seq(input, vector, cutline) if checkCutline(cutline) => + RST_Clip(input, vector, cutline, exprConfig) + case _ => RST_Clip(children.head, children(1), children(2), exprConfig) + } + } } } diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 0899d03f8..0a3c14a1b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -700,7 +700,9 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_bandmetadata(raster: Column, band: Int): Column = ColumnAdapter(RST_BandMetaData(raster.expr, lit(band).expr, expressionConfig)) def rst_boundingbox(raster: Column): Column = ColumnAdapter(RST_BoundingBox(raster.expr, expressionConfig)) - def rst_clip(raster: Column, geometry: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, expressionConfig)) + def rst_clip(raster: Column, geometry: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, lit(true).expr, expressionConfig)) + def rst_clip(raster: Column, geometry: Column, cutline: Boolean): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, lit(cutline).expr, expressionConfig)) + def rst_clip(raster: Column, geometry: Column, cutline: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, cutline.expr, expressionConfig)) def rst_convolve(raster: Column, kernel: Column): Column = ColumnAdapter(RST_Convolve(raster.expr, kernel.expr, expressionConfig)) def rst_dtmfromgeoms(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column = ColumnAdapter(RST_DTMFromGeoms(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig)) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala index 9d9328ca6..486f03736 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala @@ -2,14 +2,17 @@ package com.databricks.labs.mosaic.expressions.raster import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.gdal.MosaicRasterGDAL import com.databricks.labs.mosaic.functions.MosaicContext +import com.databricks.labs.mosaic.test.mocks.filePath import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.functions.lit import org.scalatest.matchers.should.Matchers._ trait RST_ClipBehaviors extends QueryTest { // noinspection MapGetGet - def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + def basicBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("ERROR") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() @@ -47,4 +50,44 @@ trait RST_ClipBehaviors extends QueryTest { } + def cutlineBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + val sc = spark + import mc.functions._ + import sc.implicits._ + + val testPath = filePath("/binary/geotiff-small/chicago_sp27.tif") + + val createInfo = Map("path" -> testPath, "parentPath" -> testPath) + val testRaster = MosaicRasterGDAL.readRaster(createInfo) + + val ftMeters = 0.3 // ~0.3 ft in meter + val ftUnits = 0.3 // epsg:26771 0.3 ft per unit + val buffVal: Double = testRaster.pixelXSize * ftMeters * ftUnits * 50.5 + val clipRegion = testRaster + .bbox(geometryAPI, testRaster.getSpatialReference) + .getCentroid + .buffer(buffVal) + + clipRegion.getSpatialReference shouldBe 26771 + + val df = spark.read.format("gdal").load(testPath) + .withColumn("clipGeom", st_geomfromgeojson(lit(clipRegion.toJSON))) + + val df_include = df + .withColumn("clippedRaster", rst_clip($"tile", $"clipGeom")) + .select(rst_pixelcount($"clippedRaster").alias("pCount")) + val df_exclude = df + .withColumn("clippedRaster", rst_clip($"tile", $"clipGeom", lit(false))) + .select(rst_pixelcount($"clippedRaster").alias("pCount")) + + val pCountInclude = df_include.first.getSeq[Long](0).head + val pCountExclude = df_exclude.first.getSeq[Long](0).head + + pCountInclude should be > pCountExclude + + } + } diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipTest.scala index e3597141d..ddc974715 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipTest.scala @@ -25,7 +25,16 @@ class RST_ClipTest extends QueryTest with SharedSparkSessionGDAL with RST_ClipBe test("Testing RST_Clip with manual GDAL registration (H3, JTS).") { noCodegen { assume(System.getProperty("os.name") == "Linux") - behaviors(H3IndexSystem, JTS) + basicBehaviour(H3IndexSystem, JTS) + } + } + + // These tests are not index system nor geometry API specific. + // Only testing one pairing is sufficient. + test("Testing RST_Clip with different cutline options with manual GDAL registration (H3, JTS).") { + noCodegen { + assume(System.getProperty("os.name") == "Linux") + cutlineBehaviour(H3IndexSystem, JTS) } }