Skip to content

Commit

Permalink
added cutline option to RST_Clip
Browse files Browse the repository at this point in the history
  • Loading branch information
sllynn committed Sep 25, 2024
1 parent 63b96be commit bed01de
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 29 deletions.
19 changes: 15 additions & 4 deletions docs/source/api/raster-functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,23 +193,34 @@ rst_boundingbox
rst_clip
********

.. function:: rst_clip(tile, geometry)
.. function:: rst_clip(tile, geometry, cutline_all_touched)

Clips :code:`tile` with :code:`geometry`, provided in a supported encoding (WKB, WKT or GeoJSON).

:param tile: A column containing the raster tile.
:type tile: Column (RasterTileType)
:param geometry: A column containing the geometry to clip the raster to.
:type geometry: Column (GeometryType)
:param cutline_all_touched: A column to specify pixels boundary behavior.
:type cutline_all_touched: Column (BooleanType)
:rtype: Column: RasterTileType

.. note::
Notes
**Notes**

:code:`geometry` is expected to be:
- in the same coordinate reference system as the raster.
The :code:`geometry` parameter is expected to be:
- expressed in the same coordinate reference system as the raster.
- a polygon or a multipolygon.

The :code:`cutline_all_touched` parameter:
- Optional: default is true. this is a GDAL warp config for the operation.
- If set to true, the pixels touching the geometry are included in the clip,
regardless of half-in or half-out; this is important for tessellation behaviors.
- If set to false, only at least half-in pixels are included in the clip.

The actual GDAL command to clip looks something like the following (after some setup):
:code:`"gdalwarp -wo CUTLINE_ALL_TOUCHED=<TRUE|FALSE> -cutline <GEOMETRY> -crop_to_cutline"`

The output raster tiles will have:
- the same extent as the input geometry.
- the same number of bands as the input raster.
Expand Down
16 changes: 11 additions & 5 deletions python/mosaic/api/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,29 +147,35 @@ def rst_boundingbox(raster_tile: ColumnOrName) -> Column:
)


def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName) -> Column:
def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName, cutline_all_touched: Any = True) -> Column:
"""
Clips the raster to the given supported geometry (WKT, WKB, GeoJSON).
The result is Mosaic raster tile struct column to the clipped raster.
The result is stored in the checkpoint directory.
Clips `raster_tile` to the given supported `geometry` (WKT, WKB, GeoJSON).
The result is a Mosaic raster tile representing the clipped raster.
Parameters
----------
raster_tile : Column (RasterTileType)
Mosaic raster tile struct column.
geometry : Column (StringType)
The geometry to clip the raster to.
The geometry to clip the tile to.
cutline_all_touched : Column (BooleanType)
optional override to specify whether any pixels touching
cutline should be included vs half-in only, default is true
Returns
-------
Column (RasterTileType)
Mosaic raster tile struct column.
"""
if type(cutline_all_touched) == bool:
cutline_all_touched = lit(cutline_all_touched)

return config.mosaic_context.invoke_function(
"rst_clip",
pyspark_to_java_column(raster_tile),
pyspark_to_java_column(geometry),
pyspark_to_java_column(cutline_all_touched)
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ case class MosaicRasterGDAL(
val gt = getGeoTransform

val sourceCRS = getSpatialReference

val destCRSEPSGCode = (destCRS.GetAuthorityName(null), destCRS.GetAuthorityCode(null)) match {
case (null, _) => 0
case (name: String, code: String) if name == "EPSG" => code.toInt
case _ => 0
}

val transform = new osr.CoordinateTransformation(sourceCRS, destCRS)

val bbox = geometryAPI.geometry(
Expand All @@ -96,7 +103,9 @@ case class MosaicRasterGDAL(
val geom1 = org.gdal.ogr.ogr.CreateGeometryFromWkb(bbox.toWKB)
geom1.Transform(transform)

geometryAPI.geometry(geom1.ExportToWkb(), "WKB")
val mosaicGeom = geometryAPI.geometry(geom1.ExportToWkb(), "WKB")
mosaicGeom.setSpatialReference(destCRSEPSGCode)
mosaicGeom
}

/** @return The diagonal size of a raster. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import com.databricks.labs.mosaic.core.raster.operator.gdal.GDALWarp
import com.databricks.labs.mosaic.utils.PathUtils
import org.gdal.osr.SpatialReference

import java.util.Locale

/**
* RasterClipByVector is an object that defines the interface for clipping a
* raster by a vector geometry.
Expand All @@ -31,25 +33,35 @@ object RasterClipByVector {
* The geometry CRS.
* @param geometryAPI
* The geometry API.
* @param cutlineAllTouched
* Whether pixels touching cutline are automatically included (true) or
* included only the cutline crosses the centre point of the pixel
* (false) default: true.
* @return
* A clipped raster.
*/
def clip(raster: MosaicRasterGDAL, geometry: MosaicGeometry, geomCRS: SpatialReference, geometryAPI: GeometryAPI): MosaicRasterGDAL = {
def clip(
raster: MosaicRasterGDAL,
geometry: MosaicGeometry,
geomCRS: SpatialReference,
geometryAPI: GeometryAPI,
cutlineAllTouched: Boolean = true
): MosaicRasterGDAL = {
val rasterCRS = raster.getSpatialReference
val outShortName = raster.getDriversShortName
val geomSrcCRS = if (geomCRS == null) rasterCRS else geomCRS

val cutlineToken = cutlineAllTouched.toString.toUpperCase(Locale.US)

val resultFileName = PathUtils.createTmpFilePath(GDAL.getExtension(outShortName))

val shapeFileName = VectorClipper.generateClipper(geometry, geomSrcCRS, rasterCRS, geometryAPI)

// For -wo consult https://gdal.org/doxygen/structGDALWarpOptions.html
// SOURCE_EXTRA=3 is used to ensure that when the raster is clipped, the
// pixels that touch the geometry are included. The default is 1, 3 seems to be a good empirical value.
val result = GDALWarp.executeWarp(
resultFileName,
Seq(raster),
command = s"gdalwarp -wo CUTLINE_ALL_TOUCHED=TRUE -cutline $shapeFileName -crop_to_cutline"
command = s"gdalwarp -wo CUTLINE_ALL_TOUCHED=$cutlineToken -cutline $shapeFileName -crop_to_cutline"
)

VectorClipper.cleanUpClipper(shapeFileName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ object VectorClipper {
val projectedGeom = geometry.osrTransformCRS(srcCrs, dstCrs, geometryAPI)

val geom = ogr.CreateGeometryFromWkb(projectedGeom.toWKB(2))
geom.AssignSpatialReference(dstCrs)

val geomLayer = shpDataSource.CreateLayer("geom")
val geomLayer = shpDataSource.CreateLayer("geom", dstCrs)

val idField = new org.gdal.ogr.FieldDefn("id", OFTInteger)
geomLayer.CreateField(idField)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,49 +5,57 @@ import com.databricks.labs.mosaic.core.raster.api.GDAL
import com.databricks.labs.mosaic.core.raster.operator.clip.RasterClipByVector
import com.databricks.labs.mosaic.core.types.RasterTileType
import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile
import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo}
import com.databricks.labs.mosaic.expressions.raster.base.Raster1ArgExpression
import com.databricks.labs.mosaic.expressions.base.WithExpressionInfo
import com.databricks.labs.mosaic.expressions.raster.base.Raster2ArgExpression
import com.databricks.labs.mosaic.functions.MosaicExpressionConfig
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant}
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, NullIntolerant}
import org.apache.spark.sql.types.BooleanType

import scala.util.Try

/** The expression for clipping a raster by a vector. */
case class RST_Clip(
rastersExpr: Expression,
geometryExpr: Expression,
cutlineAllTouchedExpr: Expression,
expressionConfig: MosaicExpressionConfig
) extends Raster1ArgExpression[RST_Clip](
) extends Raster2ArgExpression[RST_Clip](
rastersExpr,
geometryExpr,
cutlineAllTouchedExpr,
returnsRaster = true,
expressionConfig = expressionConfig
)
with NullIntolerant
with CodegenFallback {

val geometryAPI: GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI)

override def dataType: org.apache.spark.sql.types.DataType = {
GDAL.enable(expressionConfig)
RasterTileType(expressionConfig.getCellIdType, rastersExpr, expressionConfig.isRasterUseCheckpoint)
}

val geometryAPI: GeometryAPI = GeometryAPI(expressionConfig.getGeometryAPI)

/**
* Clips a raster by a vector.
*
* @param tile
* The raster to be used.
* @param arg1
* The vector to be used.
* @param arg2
* cutline handling (boolean).
* @return
* The clipped raster.
*/
override def rasterTransform(tile: MosaicRasterTile, arg1: Any): Any = {
override def rasterTransform(tile: MosaicRasterTile, arg1: Any, arg2: Any): Any = {
val geometry = geometryAPI.geometry(arg1, geometryExpr.dataType)
val geomCRS = geometry.getSpatialReferenceOSR
val cutlineAllTouched = arg2.asInstanceOf[Boolean]
tile.copy(
raster = RasterClipByVector.clip(tile.getRaster, geometry, geomCRS, geometryAPI)
raster = RasterClipByVector.clip(tile.getRaster, geometry, geomCRS, geometryAPI, cutlineAllTouched)
)
}

Expand All @@ -60,7 +68,7 @@ object RST_Clip extends WithExpressionInfo {

override def usage: String =
"""
|_FUNC_(expr1) - Returns a raster tile clipped by provided vector.
|_FUNC_(expr1, expr2) - Returns a raster tile clipped by provided vector.
|""".stripMargin

override def example: String =
Expand All @@ -72,8 +80,20 @@ object RST_Clip extends WithExpressionInfo {
| ...
| """.stripMargin

override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = {
GenericExpressionFactory.getBaseBuilder[RST_Clip](2, expressionConfig)
override def builder(exprConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) =>
{
def checkCutline(cutline: Expression): Boolean = Try(cutline.eval().asInstanceOf[Boolean]).isSuccess
val noCutlineArg = new Literal(true, BooleanType) // default is true for tessellation needs

children match {
// Note type checking only works for literals
case Seq(input, vector) =>
RST_Clip(input, vector, noCutlineArg, exprConfig)
case Seq(input, vector, cutline) if checkCutline(cutline) =>
RST_Clip(input, vector, cutline, exprConfig)
case _ => RST_Clip(children.head, children(1), children(2), exprConfig)
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,9 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends
def rst_bandmetadata(raster: Column, band: Int): Column =
ColumnAdapter(RST_BandMetaData(raster.expr, lit(band).expr, expressionConfig))
def rst_boundingbox(raster: Column): Column = ColumnAdapter(RST_BoundingBox(raster.expr, expressionConfig))
def rst_clip(raster: Column, geometry: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, expressionConfig))
def rst_clip(raster: Column, geometry: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, lit(true).expr, expressionConfig))
def rst_clip(raster: Column, geometry: Column, cutline: Boolean): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, lit(cutline).expr, expressionConfig))
def rst_clip(raster: Column, geometry: Column, cutline: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, cutline.expr, expressionConfig))
def rst_convolve(raster: Column, kernel: Column): Column = ColumnAdapter(RST_Convolve(raster.expr, kernel.expr, expressionConfig))
def rst_dtmfromgeoms(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column =
ColumnAdapter(RST_DTMFromGeoms(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@ package com.databricks.labs.mosaic.expressions.raster

import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
import com.databricks.labs.mosaic.core.index.IndexSystem
import com.databricks.labs.mosaic.core.raster.gdal.MosaicRasterGDAL
import com.databricks.labs.mosaic.functions.MosaicContext
import com.databricks.labs.mosaic.test.mocks.filePath
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.functions.lit
import org.scalatest.matchers.should.Matchers._

trait RST_ClipBehaviors extends QueryTest {

// noinspection MapGetGet
def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = {
def basicBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = {
spark.sparkContext.setLogLevel("ERROR")
val mc = MosaicContext.build(indexSystem, geometryAPI)
mc.register()
Expand Down Expand Up @@ -47,4 +50,44 @@ trait RST_ClipBehaviors extends QueryTest {

}

def cutlineBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = {
spark.sparkContext.setLogLevel("ERROR")
val mc = MosaicContext.build(indexSystem, geometryAPI)
mc.register()
val sc = spark
import mc.functions._
import sc.implicits._

val testPath = filePath("/binary/geotiff-small/chicago_sp27.tif")

val createInfo = Map("path" -> testPath, "parentPath" -> testPath)
val testRaster = MosaicRasterGDAL.readRaster(createInfo)

val ftMeters = 0.3 // ~0.3 ft in meter
val ftUnits = 0.3 // epsg:26771 0.3 ft per unit
val buffVal: Double = testRaster.pixelXSize * ftMeters * ftUnits * 50.5
val clipRegion = testRaster
.bbox(geometryAPI, testRaster.getSpatialReference)
.getCentroid
.buffer(buffVal)

clipRegion.getSpatialReference shouldBe 26771

val df = spark.read.format("gdal").load(testPath)
.withColumn("clipGeom", st_geomfromgeojson(lit(clipRegion.toJSON)))

val df_include = df
.withColumn("clippedRaster", rst_clip($"tile", $"clipGeom"))
.select(rst_pixelcount($"clippedRaster").alias("pCount"))
val df_exclude = df
.withColumn("clippedRaster", rst_clip($"tile", $"clipGeom", lit(false)))
.select(rst_pixelcount($"clippedRaster").alias("pCount"))

val pCountInclude = df_include.first.getSeq[Long](0).head
val pCountExclude = df_exclude.first.getSeq[Long](0).head

pCountInclude should be > pCountExclude

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,16 @@ class RST_ClipTest extends QueryTest with SharedSparkSessionGDAL with RST_ClipBe
test("Testing RST_Clip with manual GDAL registration (H3, JTS).") {
noCodegen {
assume(System.getProperty("os.name") == "Linux")
behaviors(H3IndexSystem, JTS)
basicBehaviour(H3IndexSystem, JTS)
}
}

// These tests are not index system nor geometry API specific.
// Only testing one pairing is sufficient.
test("Testing RST_Clip with different cutline options with manual GDAL registration (H3, JTS).") {
noCodegen {
assume(System.getProperty("os.name") == "Linux")
cutlineBehaviour(H3IndexSystem, JTS)
}
}

Expand Down

0 comments on commit bed01de

Please sign in to comment.