From a9b17e4a4dc9c7647905b248ea894b6659b95a1c Mon Sep 17 00:00:00 2001 From: Stuart Lynn <50170698+sllynn@users.noreply.github.com> Date: Fri, 1 Nov 2024 18:52:00 +0000 Subject: [PATCH] Resolving triangulation issues (#592) * fixes to triangulation; exposed split point finding algorithm choice * updated R bindings and tests * fixed python bindings and tests * fixed failing test (sidecar files not cleaned up between checkpoint on / checkpoint off tests) * updated docs to reflect new sigs of triangulation funcs --- .../tests/testthat/testRasterFunctions.R | 10 +- .../tests/testthat/testVectorFunctions.R | 9 +- .../tests/testthat/testRasterFunctions.R | 4 +- .../tests/testthat/testVectorFunctions.R | 9 +- docs/docs-requirements.txt | 2 +- docs/source/api/raster-functions.rst | 32 ++++-- docs/source/api/spatial-functions.rst | 37 +++++-- pom.xml | 4 +- python/mosaic/api/functions.py | 10 ++ python/mosaic/api/raster.py | 11 ++ python/test/test_raster_functions.py | 6 +- python/test/test_vector_functions.py | 9 +- .../multipoint/MosaicMultiPoint.scala | 6 +- .../multipoint/MosaicMultiPointJTS.scala | 26 ++--- ...nformingDelaunayTriangulationBuilder.scala | 101 ++++++++++++++++++ .../operator/rasterize/GDALRasterize.scala | 2 +- .../TriangulationSplitPointTypeEnum.scala | 18 ++++ .../geometry/ST_InterpolateElevation.scala | 37 ++++--- .../expressions/geometry/ST_Triangulate.scala | 23 ++-- .../expressions/raster/RST_DTMFromGeoms.scala | 42 +++++--- .../labs/mosaic/functions/MosaicContext.scala | 12 +-- .../multipoint/TestMultiPointJTS.scala | 12 +-- .../ST_InterpolateElevationBehaviours.scala | 22 ++-- .../geometry/ST_TriangulateBehaviours.scala | 25 ++--- .../geometry/ST_TriangulateTest.scala | 1 - .../raster/RST_DTMFromGeomsBehaviours.scala | 39 ++++--- .../raster/RST_DTMFromGeomsTest.scala | 4 +- .../raster/RST_SetSRIDBehaviors.scala | 8 +- .../sql/test/SharedSparkSessionGDAL.scala | 23 +++- 29 files changed, 406 insertions(+), 138 deletions(-) create mode 100644 src/main/scala/com/databricks/labs/mosaic/core/geometry/triangulation/JTSConformingDelaunayTriangulationBuilder.scala create mode 100644 src/main/scala/com/databricks/labs/mosaic/core/types/model/TriangulationSplitPointTypeEnum.scala diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R index 538062b9c..13e78ac24 100644 --- a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R +++ b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testRasterFunctions.R @@ -8,7 +8,7 @@ generate_singleband_raster_df <- function() { test_that("mosaic can read single-band GeoTiff", { sdf <- generate_singleband_raster_df() - + row <- first(sdf) expect_equal(row$length, 1067862L) expect_equal(row$x_size, 2400) @@ -158,14 +158,16 @@ sdf <- createDataFrame( sdf <- agg(groupBy(sdf), masspoints = collect_list(column("wkt"))) sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) +sdf <- withColumn(sdf, "splitPointFinder", lit("NONENCROACHING")) sdf <- withColumn(sdf, "origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) sdf <- withColumn(sdf, "xWidth", lit(12L)) sdf <- withColumn(sdf, "yWidth", lit(6L)) sdf <- withColumn(sdf, "xSize", lit(0.1)) sdf <- withColumn(sdf, "ySize", lit(0.1)) +sdf <- withColumn(sdf, "noData", lit(-9999.0)) sdf <- withColumn(sdf, "tile", rst_dtmfromgeoms( -column("masspoints"), column("breaklines"), lit(0.0), lit(0.01), -column("origin"), column("xWidth"), column("yWidth"), column("xSize"), column("ySize")) -) +column("masspoints"), column("breaklines"), lit(0.0), lit(0.01), column("splitPointFinder"), +column("origin"), column("xWidth"), column("yWidth"), column("xSize"), column("ySize"), column("noData") +)) expect_equal(SparkR::count(sdf), 1) }) \ No newline at end of file diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R index 4294e1a15..07744b2bf 100644 --- a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R +++ b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R @@ -109,14 +109,14 @@ sdf <- createDataFrame( sdf <- agg(groupBy(sdf), masspoints = collect_list(column("wkt"))) sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) -triangulation_sdf <- withColumn(sdf, "triangles", st_triangulate(column("masspoints"), column("breaklines"), lit(0.0), lit(0.01))) +triangulation_sdf <- withColumn(sdf, "triangles", st_triangulate(column("masspoints"), column("breaklines"), lit(0.0), lit(0.01), lit("NONENCROACHING"))) cache(triangulation_sdf) expect_equal(SparkR::count(triangulation_sdf), 2) expected <- c("POLYGON Z((0 2 2, 2 1 0, 1 3 3, 0 2 2))", "POLYGON Z((1 3 3, 2 1 0, 3 2 1, 1 3 3))") expect_contains(expected, first(triangulation_sdf)$triangles) interpolation_sdf <- sdf -interpolation_sdf <- withColumn(interpolation_sdf, "origin", st_geomfromwkt(lit("POINT (0.6 1.8)"))) +interpolation_sdf <- withColumn(interpolation_sdf, "origin", st_geomfromwkt(lit("POINT (0.55 1.75)"))) interpolation_sdf <- withColumn(interpolation_sdf, "xWidth", lit(12L)) interpolation_sdf <- withColumn(interpolation_sdf, "yWidth", lit(6L)) interpolation_sdf <- withColumn(interpolation_sdf, "xSize", lit(0.1)) @@ -124,8 +124,9 @@ interpolation_sdf <- withColumn(interpolation_sdf, "ySize", lit(0.1)) interpolation_sdf <- withColumn(interpolation_sdf, "interpolated", st_interpolateelevation( column("masspoints"), column("breaklines"), - lit(0.0), lit(0.01), + lit(0.01), + lit("NONENCROACHING"), column("origin"), column("xWidth"), column("yWidth"), @@ -134,5 +135,5 @@ interpolation_sdf <- withColumn(interpolation_sdf, "interpolated", st_interpolat )) cache(interpolation_sdf) expect_equal(SparkR::count(interpolation_sdf), 6 * 12) -expect_contains(collect(interpolation_sdf)$interpolated, "POINT Z(0.6 2 1.8)") +expect_contains(collect(interpolation_sdf)$interpolated, "POINT Z(1.6 1.8 1.2)") }) diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R index ddbcce8c8..d67888725 100644 --- a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R +++ b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testRasterFunctions.R @@ -209,11 +209,13 @@ test_that ("a terrain model can be produced from point geometries", { breaklines, as.double(0.0), as.double(0.01), + "NONENCROACHING", origin, xWidth, yWidth, xSize, - ySize + ySize, + as.double(-9999.0) ) ) expect_equal(sdf_nrow(sdf), 1) diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R index a177b37bf..c8baaf572 100644 --- a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R +++ b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R @@ -134,7 +134,7 @@ test_that ("triangulation and interpolation functions behave as intended", { mutate(breaklines = array("LINESTRING EMPTY")) triangulation_sdf <- sdf %>% - mutate(triangles = st_triangulate(masspoints, breaklines, as.double(0.00), as.double(0.01))) + mutate(triangles = st_triangulate(masspoints, breaklines, as.double(0.00), as.double(0.01), "NONENCROACHING")) expect_equal(sdf_nrow(triangulation_sdf), 2) @@ -144,7 +144,7 @@ test_that ("triangulation and interpolation functions behave as intended", { interpolation_sdf <- sdf %>% mutate( - origin = st_geomfromwkt("POINT (0.6 1.8)"), + origin = st_geomfromwkt("POINT (0.55 1.75)"), xWidth = 12L, yWidth = 6L, xSize = as.double(0.1), @@ -152,8 +152,9 @@ test_that ("triangulation and interpolation functions behave as intended", { interpolated = st_interpolateelevation( masspoints, breaklines, - as.double(0.0), as.double(0.01), + as.double(0.01), + "NONENCROACHING", origin, xWidth, yWidth, @@ -163,5 +164,5 @@ test_that ("triangulation and interpolation functions behave as intended", { ) expect_equal(sdf_nrow(interpolation_sdf), 6 * 12) expect_contains(sdf_collect(interpolation_sdf)$interpolated, - "POINT Z(0.6 2 1.8)") + "POINT Z(1.6 1.8 1.2)") }) diff --git a/docs/docs-requirements.txt b/docs/docs-requirements.txt index 969601087..60f5c67fa 100644 --- a/docs/docs-requirements.txt +++ b/docs/docs-requirements.txt @@ -1,4 +1,4 @@ -setuptools==68.1.2 +setuptools>=70.0.0 Sphinx==6.1.3 sphinx-material==0.0.36 nbsphinx>=0.8.8 diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index db18f0171..0b0ec5fcf 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -491,7 +491,7 @@ rst_derivedband rst_dtmfromgeoms **************** -.. function:: rst_dtmfromgeoms(pointsArray, linesArray, mergeTolerance, snapTolerance, origin, xWidth, yWidth, xSize, ySize) +.. function:: rst_dtmfromgeoms(pointsArray, linesArray, mergeTolerance, snapTolerance, splitPointFinder, origin, xWidth, yWidth, xSize, ySize, noData) Generate a raster with interpolated elevations across a grid of points described by: @@ -518,6 +518,13 @@ rst_dtmfromgeoms Setting this value to zero may result in the output triangle vertices being assigned a null Z value. Both tolerance parameters are expressed in the same units as the projection of the input point geometries. + Additionally, you have control over the algorithm used to find split points on the constraint lines. The recommended + default option here is the "NONENCROACHING" algorithm. You can also use the "MIDPOINT" algorithm if you find the + constraint fitting process fails to converge. For full details of these options see the JTS reference + `here `__. + + The :code:`noData` value of the output raster can be set using the :code:`noData` parameter. + This is a generator expression and the resulting DataFrame will contain one row per point of the grid. :param pointsArray: Array of geometries respresenting the points to be triangulated @@ -528,6 +535,8 @@ rst_dtmfromgeoms :type mergeTolerance: Column (DoubleType) :param snapTolerance: A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. :type snapTolerance: Column (DoubleType) + :param splitPointFinder: Algorithm used for finding split points on constraint lines. Options are "NONENCROACHING" and "MIDPOINT". + :type splitPointFinder: Column (StringType) :param origin: A point geometry describing the bottom-left corner of the grid. :type origin: Column (Geometry) :param xWidth: The number of points in the grid in x direction. @@ -538,6 +547,8 @@ rst_dtmfromgeoms :type xSize: Column (DoubleType) :param ySize: The spacing between each point on the grid's y-axis. :type ySize: Column (DoubleType) + :param noData: The no-data value of the output raster. + :type noData: Column (DoubleType) :rtype: Column (RasterTileType) :example: @@ -567,7 +578,8 @@ rst_dtmfromgeoms df.select( rst_dtmfromgeoms( "masspoints", "breaklines", lit(0.0), lit(0.01), - "origin", "xWidth", "yWidth", "xSize", "ySize" + "origin", "xWidth", "yWidth", "xSize", "ySize", + split_point_finder="NONENCROACHING", no_data_value=-9999.0 ) ).show(truncate=False) +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -594,8 +606,10 @@ rst_dtmfromgeoms df.select( rst_dtmfromgeoms( - $"masspoints", $"breaklines", lit(0.0), lit(0.01), - $"origin", $"xWidth", $"yWidth", $"xSize", $"ySize" + $"masspoints", $"breaklines", + lit(0.0), lit(0.01), lit("NONENCROACHING") + $"origin", $"xWidth", $"yWidth", + $"xSize", $"ySize", lit(-9999.0) ) ).show(1, false) +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -615,8 +629,9 @@ rst_dtmfromgeoms "POINT Z (0 2 2)" ), ARRAY("LINESTRING EMPTY"), - DOUBLE(0.0), DOUBLE(0.01), - "POINT (0.6 1.8)", 12, 6, DOUBLE(0.1), DOUBLE(0.1) + DOUBLE(0.0), DOUBLE(0.01), "NONENCROACHING", + "POINT (0.6 1.8)", 12, 6, + DOUBLE(0.1), DOUBLE(0.1), DOUBLE(-9999.0) ) AS tile +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |rst_dtmfromgeoms(masspoints, breaklines, 0.0, 0.01, origin, xWidth, yWidth, xSize, ySize) | @@ -638,8 +653,9 @@ rst_dtmfromgeoms sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) sdf <- select(sdf, rst_dtmfromgeoms( column("masspoints"), column("breaklines"), - lit(0.0), lit(0.01), - lit("POINT (0.6 1.8)"), lit(12L), lit(6L), lit(0.1), lit(0.1) + lit(0.0), lit(0.01), lit("NONENCROACHING"), + lit("POINT (0.6 1.8)"), lit(12L), lit(6L), + lit(0.1), lit(0.1), lit(-9999.0) ) ) showDF(sdf, n=1, truncate=F) diff --git a/docs/source/api/spatial-functions.rst b/docs/source/api/spatial-functions.rst index 9919bc10b..44b5945fd 100644 --- a/docs/source/api/spatial-functions.rst +++ b/docs/source/api/spatial-functions.rst @@ -881,7 +881,7 @@ st_haversine st_interpolateelevation *********************** -.. function:: st_interpolateelevation(pointsArray, linesArray, mergeTolerance, snapTolerance, origin, xWidth, yWidth, xSize, ySize) +.. function:: st_interpolateelevation(pointsArray, linesArray, mergeTolerance, snapTolerance, splitPointFinder, origin, xWidth, yWidth, xSize, ySize) Compute interpolated elevations across a grid of points described by: @@ -908,6 +908,11 @@ st_interpolateelevation Setting this value to zero may result in the output triangle vertices being assigned a null Z value. Both tolerance parameters are expressed in the same units as the projection of the input point geometries. + Additionally, you have control over the algorithm used to find split points on the constraint lines. The recommended + default option here is the "NONENCROACHING" algorithm. You can also use the "MIDPOINT" algorithm if you find the + constraint fitting process fails to converge. For full details of these options see the JTS reference + `here `__. + This is a generator expression and the resulting DataFrame will contain one row per point of the grid. :param pointsArray: Array of geometries respresenting the points to be triangulated @@ -919,6 +924,8 @@ st_interpolateelevation :param snapTolerance: A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. :type snapTolerance: Column (DoubleType) :param origin: A point geometry describing the bottom-left corner of the grid. + :param splitPointFinder: Algorithm used for finding split points on constraint lines. Options are "NONENCROACHING" and "MIDPOINT". + :type splitPointFinder: Column (StringType) :type origin: Column (Geometry) :param xWidth: The number of points in the grid in x direction. :type xWidth: Column (IntegerType) @@ -957,7 +964,8 @@ st_interpolateelevation df.select( st_interpolateelevation( "masspoints", "breaklines", lit(0.0), lit(0.01), - "origin", "xWidth", "yWidth", "xSize", "ySize" + "origin", "xWidth", "yWidth", "xSize", "ySize", + split_point_finder="NONENCROACHING" ) ).show(4, truncate=False) +--------------------------------------------------+ @@ -987,7 +995,8 @@ st_interpolateelevation df.select( st_interpolateelevation( - $"masspoints", $"breaklines", lit(0.0), lit(0.01), + $"masspoints", $"breaklines", + lit(0.0), lit(0.01), lit("NONENCROACHING"), $"origin", $"xWidth", $"yWidth", $"xSize", $"ySize" ) ).show(4, false) @@ -1011,7 +1020,7 @@ st_interpolateelevation "POINT Z (0 2 2)" ), ARRAY("LINESTRING EMPTY"), - DOUBLE(0.0), DOUBLE(0.01), + DOUBLE(0.0), DOUBLE(0.01), "NONENCROACHING", "POINT (0.6 1.8)", 12, 6, DOUBLE(0.1), DOUBLE(0.1) ) +--------------------------------------------------+ @@ -1037,7 +1046,7 @@ st_interpolateelevation sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) sdf <- select(sdf, st_interpolateelevation( column("masspoints"), column("breaklines"), - lit(0.0), lit(0.01), + lit(0.0), lit(0.01), lit("NONENCROACHING"), lit("POINT (0.6 1.8)"), lit(12L), lit(6L), lit(0.1), lit(0.1) ) ) @@ -1757,7 +1766,7 @@ st_transform st_triangulate ************** -.. function:: st_triangulate(pointsArray, linesArray, mergeTolerance, snapTolerance) +.. function:: st_triangulate(pointsArray, linesArray, mergeTolerance, snapTolerance, splitPointFinder) Performs a conforming Delaunay triangulation using the points in :code:`pointsArray` including :code:`linesArray` as constraint / break lines. @@ -1773,6 +1782,11 @@ st_triangulate Setting this value to zero may result in the output triangle vertices being assigned a null Z value. Both tolerance parameters are expressed in the same units as the projection of the input point geometries. + Additionally, you have control over the algorithm used to find split points on the constraint lines. The recommended + default option here is the "NONENCROACHING" algorithm. You can also use the "MIDPOINT" algorithm if you find the + constraint fitting process fails to converge. For full details of these options see the JTS reference + `here `__. + This is a generator expression and the resulting DataFrame will contain one row per triangle returned by the algorithm. :param pointsArray: Array of geometries respresenting the points to be triangulated @@ -1783,6 +1797,8 @@ st_triangulate :type mergeTolerance: Column (DoubleType) :param snapTolerance: A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. :type snapTolerance: Column (DoubleType) + :param splitPointFinder: Algorithm used for finding split points on constraint lines. Options are "NONENCROACHING" and "MIDPOINT". + :type splitPointFinder: Column (StringType) :rtype: Column (Geometry) :example: @@ -1803,7 +1819,7 @@ st_triangulate .groupBy() .agg(collect_list("wkt").alias("masspoints")) .withColumn("breaklines", array(lit("LINESTRING EMPTY"))) - .withColumn("triangles", st_triangulate("masspoints", "breaklines", lit(0.0), lit(0.01)) + .withColumn("triangles", st_triangulate("masspoints", "breaklines", lit(0.0), lit(0.01), "NONENCROACHING")) ) df.show(2, False) +---------------------------------------+ @@ -1826,7 +1842,7 @@ st_triangulate .withColumn("triangles", st_triangulate( $"masspoints", $"breaklines", - lit(0.0), lit(0.01) + lit(0.0), lit(0.01), lit("NONENCROACHING") ) ) @@ -1849,7 +1865,8 @@ st_triangulate "POINT Z (0 2 2)" ), ARRAY("LINESTRING EMPTY"), - DOUBLE(0.0), DOUBLE(0.01) + DOUBLE(0.0), DOUBLE(0.01), + "NONENCROACHING" ) +---------------------------------------+ |triangles | @@ -1872,7 +1889,7 @@ st_triangulate sdf <- withColumn(sdf, "breaklines", expr("array('LINESTRING EMPTY')")) result <- select(sdf, st_triangulate( column("masspoints"), column("breaklines"), - lit(0.0), lit(0.01)) + lit(0.0), lit(0.01), lit("NONENCROACHING") ) showDF(result, truncate=F) +---------------------------------------+ diff --git a/pom.xml b/pom.xml index ba0c1b94f..1d0ace000 100644 --- a/pom.xml +++ b/pom.xml @@ -98,12 +98,12 @@ org.locationtech.jts jts-core - 1.19.0 + 1.20.0 org.locationtech.jts.io jts-io-common - 1.19.0 + 1.20.0 org.locationtech.proj4j diff --git a/python/mosaic/api/functions.py b/python/mosaic/api/functions.py index e02458f41..4e7d450db 100644 --- a/python/mosaic/api/functions.py +++ b/python/mosaic/api/functions.py @@ -622,6 +622,7 @@ def st_triangulate( lines_array: ColumnOrName, merge_tolerance: ColumnOrName, snap_tolerance: ColumnOrName, + split_point_finder: str = "NONENCROACHING", ) -> Column: """ Generate a triangulated surface mesh from the set of points `points_array` and constraint lines `lines_array`. @@ -641,6 +642,9 @@ def st_triangulate( A tolerance used to coalesce points in close proximity to each other before performing triangulation. snap_tolerance : Column A snapping tolerance used to relate created points to their corresponding lines for elevation interpolation. + split_point_finder : String + (Optional) The split point finding algorithm used to incorporate `lines_array` into the triangulation. + Default is "NONENCROACHING", alternative is "MIDPOINT". Returns ------- @@ -652,6 +656,7 @@ def st_triangulate( pyspark_to_java_column(lines_array), pyspark_to_java_column(merge_tolerance), pyspark_to_java_column(snap_tolerance), + pyspark_to_java_column(lit(split_point_finder)), ) @@ -665,6 +670,7 @@ def st_interpolateelevation( y_width: ColumnOrName, x_size: ColumnOrName, y_size: ColumnOrName, + split_point_finder: str = "NONENCROACHING", ) -> Column: """ Compute interpolated elevations across a grid of points described by @@ -701,6 +707,9 @@ def st_interpolateelevation( y_size : Column The spacing between each point on the grid's y-axis (in meters or degrees depending on the projection of `points_array`) + split_point_finder : String + (Optional) The split point finding algorithm used to incorporate `lines_array` into the triangulation. + Default is "NONENCROACHING", alternative is "MIDPOINT". Returns ------- @@ -712,6 +721,7 @@ def st_interpolateelevation( pyspark_to_java_column(lines_array), pyspark_to_java_column(merge_tolerance), pyspark_to_java_column(snap_tolerance), + pyspark_to_java_column(lit(split_point_finder)), pyspark_to_java_column(origin), pyspark_to_java_column(x_width), pyspark_to_java_column(y_width), diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index ef3c42e2c..3e29b0125 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -1,3 +1,5 @@ +import numpy as np + from typing import Any from pyspark.sql import Column @@ -267,6 +269,8 @@ def rst_dtmfromgeoms( y_width: ColumnOrName, x_size: ColumnOrName, y_size: ColumnOrName, + split_point_finder: str = "NONENCROACHING", + no_data_value: float = np.nan, ) -> Column: """ Generate a raster with interpolated elevations across a grid of points described by @@ -303,6 +307,11 @@ def rst_dtmfromgeoms( y_size : Column The spacing between each point on the grid's y-axis (in meters or degrees depending on the projection of `points_array`) + split_point_finder : String + (Optional) The split point finding algorithm used to incorporate `lines_array` into the triangulation. + Default is "NONENCROACHING", alternative is "MIDPOINT". + no_data_value : Float + (Optional) The nodata value to assign to the output raster. Must be a Double Type value. Default is NaN. Returns ------- @@ -315,11 +324,13 @@ def rst_dtmfromgeoms( pyspark_to_java_column(lines_array), pyspark_to_java_column(merge_tolerance), pyspark_to_java_column(snap_tolerance), + pyspark_to_java_column(lit(split_point_finder)), pyspark_to_java_column(origin), pyspark_to_java_column(x_width), pyspark_to_java_column(y_width), pyspark_to_java_column(x_size), pyspark_to_java_column(y_size), + pyspark_to_java_column(lit(no_data_value)), ) diff --git a/python/test/test_raster_functions.py b/python/test/test_raster_functions.py index 22bfb0379..0096d4927 100644 --- a/python/test/test_raster_functions.py +++ b/python/test/test_raster_functions.py @@ -115,7 +115,9 @@ def test_raster_scalar_functions(self): "rst_worldtorastercoord", api.rst_worldtorastercoord("tile", lit(0.0), lit(0.0)), ) - .withColumn("rst_write", api.rst_write("tile", lit("/mnt/mosaic_tmp/write-tile"))) + .withColumn( + "rst_write", api.rst_write("tile", lit("/mnt/mosaic_tmp/write-tile")) + ) ) result.write.format("noop").mode("overwrite").save() self.assertEqual(result.count(), 1) @@ -300,6 +302,8 @@ def test_dtmfromgeoms(self): "grid_size_y", "pixel_size_x", "pixel_size_y", + "NONENCROACHING", + -9999.0, ), ) .drop( diff --git a/python/test/test_vector_functions.py b/python/test/test_vector_functions.py index 2c6fb18a4..ba425a4bc 100644 --- a/python/test/test_vector_functions.py +++ b/python/test/test_vector_functions.py @@ -325,7 +325,9 @@ def test_triangulate_interpolate(self): triangulation_df = df.withColumn( "triangles", - api.st_triangulate("masspoints", "breaklines", lit(0.0), lit(0.01)), + api.st_triangulate( + "masspoints", "breaklines", lit(0.0), lit(0.01), lit("NONENCROACHING") + ), ) triangulation_df.cache() self.assertEqual(triangulation_df.count(), 2) @@ -338,7 +340,7 @@ def test_triangulate_interpolate(self): ) interpolation_df = ( - df.withColumn("origin", api.st_geomfromwkt(lit("POINT (0.6 1.8)"))) + df.withColumn("origin", api.st_geomfromwkt(lit("POINT (0.55 1.75)"))) .withColumn("xWidth", lit(12)) .withColumn("yWidth", lit(6)) .withColumn("xSize", lit(0.1)) @@ -355,6 +357,7 @@ def test_triangulate_interpolate(self): "yWidth", "xSize", "ySize", + lit("NONENCROACHING"), ), ) ) @@ -362,6 +365,6 @@ def test_triangulate_interpolate(self): interpolation_df.cache() self.assertEqual(interpolation_df.count(), 12 * 6) self.assertIn( - "POINT Z(0.6 2 1.8)", + "POINT Z(1.6 1.8 1.2)", [r["interpolated"] for r in interpolation_df.collect()], ) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala index 4990fb58f..f33f6d727 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPoint.scala @@ -5,6 +5,7 @@ import com.databricks.labs.mosaic.core.geometry.MosaicGeometry import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint import com.databricks.labs.mosaic.core.geometry.polygon.MosaicPolygon +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum trait MosaicMultiPoint extends MosaicGeometry { @@ -30,7 +31,7 @@ trait MosaicMultiPoint extends MosaicGeometry { * newly created points against their originating breaklines. * @return A sequence of MosaicPolygon geometries. */ - def triangulate(breaklines: Seq[MosaicLineString], mergeTolerance: Double, snapTolerance: Double): Seq[MosaicPolygon] + def triangulate(breaklines: Seq[MosaicLineString], mergeTolerance: Double, snapTolerance: Double, splitPointFinder: TriangulationSplitPointTypeEnum.Value): Seq[MosaicPolygon] /** * Interpolates the elevation of the grid points using the breaklines. @@ -40,9 +41,10 @@ trait MosaicMultiPoint extends MosaicGeometry { * @param mergeTolerance The tolerance to use to simplify the triangulation by merging nearby points. * @param snapTolerance The tolerance to use for post-processing the results of the triangulation (snapping * newly created points against their originating breaklines. + * @param splitPointFinder The method to use to find split points to include `breaklines` in the triangulation. * @return A MosaicMultiPoint geometry with the interpolated elevation. */ - def interpolateElevation(breaklines: Seq[MosaicLineString], gridPoints: MosaicMultiPoint, mergeTolerance: Double, snapTolerance: Double) : MosaicMultiPoint + def interpolateElevation(breaklines: Seq[MosaicLineString], gridPoints: MosaicMultiPoint, mergeTolerance: Double, snapTolerance: Double, splitPointFinder: TriangulationSplitPointTypeEnum.Value) : MosaicMultiPoint /** * Creates a regular point grid from the origin point with the specified number of cells and cell sizes. diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala index ba4ecf07e..e328d0e78 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/multipoint/MosaicMultiPointJTS.scala @@ -5,6 +5,7 @@ import com.databricks.labs.mosaic.core.geometry.linestring.{MosaicLineString, Mo import com.databricks.labs.mosaic.core.geometry.multilinestring.MosaicMultiLineStringJTS import com.databricks.labs.mosaic.core.geometry.point.{MosaicPoint, MosaicPointJTS} import com.databricks.labs.mosaic.core.geometry.polygon.{MosaicPolygon, MosaicPolygonJTS} +import com.databricks.labs.mosaic.core.geometry.triangulation.JTSConformingDelaunayTriangulationBuilder import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.{MULTIPOINT, POINT} import com.databricks.labs.mosaic.core.types.model._ import org.apache.spark.sql.catalyst.InternalRow @@ -12,7 +13,6 @@ import org.locationtech.jts.geom._ import org.locationtech.jts.geom.util.{LinearComponentExtracter, PolygonExtracter} import org.locationtech.jts.index.strtree.STRtree import org.locationtech.jts.linearref.LengthIndexedLine -import org.locationtech.jts.triangulate.ConformingDelaunayTriangulationBuilder import scala.collection.JavaConverters._ @@ -50,18 +50,17 @@ class MosaicMultiPointJTS(multiPoint: MultiPoint) extends MosaicGeometryJTS(mult override def getShellPoints: Seq[Seq[MosaicPointJTS]] = Seq(asSeq) - override def triangulate(breaklines: Seq[MosaicLineString], mergeTolerance: Double, snapTolerance: Double): Seq[MosaicPolygon] = { - val triangulator = new ConformingDelaunayTriangulationBuilder() - val geomFact = multiPoint.getFactory + override def triangulate(breaklines: Seq[MosaicLineString], mergeTolerance: Double, snapTolerance: Double, splitPointFinder: TriangulationSplitPointTypeEnum.Value): Seq[MosaicPolygon] = { - triangulator.setSites(multiPoint) + val triangulator = JTSConformingDelaunayTriangulationBuilder(multiPoint) + val geomFact = multiPoint.getFactory if (breaklines.nonEmpty) { - val multiLineString = MosaicMultiLineStringJTS.fromSeq(breaklines) - triangulator.setConstraints(multiLineString.getGeom) + triangulator.setConstraints(MosaicMultiLineStringJTS.fromSeq(breaklines).getGeom) } + triangulator.setTolerance(mergeTolerance) - val trianglesGeomCollection = triangulator.getTriangles(geomFact) + val trianglesGeomCollection = triangulator.getTriangles val trianglePolygons = PolygonExtracter.getPolygons(trianglesGeomCollection).asScala.map(_.asInstanceOf[Polygon]) val postProcessedTrianglePolygons = postProcessTriangulation(trianglePolygons, MosaicMultiLineStringJTS.fromSeq(breaklines).getGeom, snapTolerance) @@ -113,8 +112,11 @@ class MosaicMultiPointJTS(multiPoint: MultiPoint) extends MosaicGeometryJTS(mult ) } - override def interpolateElevation(breaklines: Seq[MosaicLineString], gridPoints: MosaicMultiPoint, mergeTolerance: Double, snapTolerance: Double): MosaicMultiPointJTS = { - val triangles = triangulate(breaklines, mergeTolerance, snapTolerance) + override def interpolateElevation( + breaklines: Seq[MosaicLineString], gridPoints: MosaicMultiPoint, + mergeTolerance: Double, snapTolerance: Double, + splitPointFinder: TriangulationSplitPointTypeEnum.Value): MosaicMultiPointJTS = { + val triangles = triangulate(breaklines, mergeTolerance, snapTolerance, splitPointFinder) .asInstanceOf[Seq[MosaicPolygonJTS]] val tree = new STRtree(4) @@ -144,8 +146,8 @@ class MosaicMultiPointJTS(multiPoint: MultiPoint) extends MosaicGeometryJTS(mult override def pointGrid(origin: MosaicPoint, xCells: Int, yCells: Int, xSize: Double, ySize: Double): MosaicMultiPointJTS = { val gridPoints = for (i <- 0 until xCells; j <- 0 until yCells) yield { - val x = origin.getX + i * xSize - val y = origin.getY + j * ySize + val x = origin.getX + i * xSize + xSize / 2 + val y = origin.getY + j * ySize + ySize / 2 val gridPoint = MosaicPointJTS(multiPoint.getFactory.createPoint(new Coordinate(x, y))) gridPoint.setSpatialReference(getSpatialReference) gridPoint diff --git a/src/main/scala/com/databricks/labs/mosaic/core/geometry/triangulation/JTSConformingDelaunayTriangulationBuilder.scala b/src/main/scala/com/databricks/labs/mosaic/core/geometry/triangulation/JTSConformingDelaunayTriangulationBuilder.scala new file mode 100644 index 000000000..f7bef1179 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/core/geometry/triangulation/JTSConformingDelaunayTriangulationBuilder.scala @@ -0,0 +1,101 @@ +package com.databricks.labs.mosaic.core.geometry.triangulation + +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum +import org.locationtech.jts.geom.{Coordinate, CoordinateList, Envelope, Geometry, LineString, MultiPoint} +import org.locationtech.jts.geom.util.LinearComponentExtracter +import org.locationtech.jts.triangulate.{ConformingDelaunayTriangulator, ConstraintSplitPointFinder, ConstraintVertex, DelaunayTriangulationBuilder, MidpointSplitPointFinder, NonEncroachingSplitPointFinder, Segment} +import org.locationtech.jts.triangulate.quadedge.QuadEdgeSubdivision + +import java.util + +class JTSConformingDelaunayTriangulationBuilder(geom: MultiPoint) { + + var tolerance: Double = 0.0 + val constraintVertexMap = new util.HashMap[Coordinate, ConstraintVertex] + var constraintLines: Geometry = null + var splitPointFinder: ConstraintSplitPointFinder = null + + def siteCoords: CoordinateList = DelaunayTriangulationBuilder.extractUniqueCoordinates(geom) + + def siteEnv: Envelope = DelaunayTriangulationBuilder.envelope(siteCoords) + + private def createSiteVertices(coords: CoordinateList): util.ArrayList[ConstraintVertex] = + { + val vertices = new util.ArrayList[ConstraintVertex] + coords.toCoordinateArray + .filter(coord => !constraintVertexMap.containsKey(coord)) + .map(new ConstraintVertex(_)) + .foreach(v => { + vertices.add(v) + }) + vertices + } + + def setTolerance(tolerance: Double): Unit = { + this.tolerance = tolerance + } + + def setConstraints(constraintLines: Geometry): Unit = { + this.constraintLines = constraintLines + } + + def setSplitPointFinder(splitPointFinder: TriangulationSplitPointTypeEnum.Value): Unit = { + this.splitPointFinder = + splitPointFinder match { + case TriangulationSplitPointTypeEnum.MIDPOINT => + new MidpointSplitPointFinder + case TriangulationSplitPointTypeEnum.NONENCROACHING => + new NonEncroachingSplitPointFinder + } + } + + def createVertices(geom: Geometry): Unit = { + geom.getCoordinates.foreach(coord => { + val v = new ConstraintVertex(coord) + constraintVertexMap.put(coord, v) + }) + } + + def createConstraintSegments(geometry: Geometry): util.ArrayList[Segment] = { + val constraintSegs = new util.ArrayList[Segment] + LinearComponentExtracter.getLines(geometry) + .toArray + .map(_.asInstanceOf[LineString]) + .filter(!_.isEmpty) + .foreach(l => createConstraintSegments(l, constraintSegs)) + constraintSegs + } + + def createConstraintSegments(line: LineString, constraintSegs: util.ArrayList[Segment]): Unit = { + val coords = line.getCoordinates + coords.zip(coords.tail) + .map(c => new Segment(c._1, c._2)) + .foreach(constraintSegs.add) + } + + def create(): QuadEdgeSubdivision = { + var segments: util.ArrayList[Segment] = null + if (constraintLines != null) { + siteEnv.expandToInclude(constraintLines.getEnvelopeInternal()) + createVertices(constraintLines) + segments = createConstraintSegments(constraintLines) + } + val sites = createSiteVertices(siteCoords) + val cdt = new ConformingDelaunayTriangulator(sites, tolerance) + + cdt.setConstraints(segments, new util.ArrayList(constraintVertexMap.values())) + cdt.formInitialDelaunay() + if (constraintLines != null) { cdt.enforceConstraints() } + cdt.getSubdivision() + } + + def getTriangles: Geometry = { + val subdiv = create() + subdiv.getTriangles(geom.getFactory) + } + +} + +object JTSConformingDelaunayTriangulationBuilder { + def apply(geom: MultiPoint): JTSConformingDelaunayTriangulationBuilder = new JTSConformingDelaunayTriangulationBuilder(geom) +} diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala index be36d6af1..b44823c5d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/operator/rasterize/GDALRasterize.scala @@ -41,7 +41,7 @@ object GDALRasterize { yWidth: Int, xSize: Double, ySize: Double, - noDataValue: Int = (-99999) + noDataValue: Double = Double.NaN ): MosaicRasterGDAL = { gdal.AllRegister() diff --git a/src/main/scala/com/databricks/labs/mosaic/core/types/model/TriangulationSplitPointTypeEnum.scala b/src/main/scala/com/databricks/labs/mosaic/core/types/model/TriangulationSplitPointTypeEnum.scala new file mode 100644 index 000000000..3281dfac4 --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/core/types/model/TriangulationSplitPointTypeEnum.scala @@ -0,0 +1,18 @@ +package com.databricks.labs.mosaic.core.types.model + +import java.util.Locale + +object TriangulationSplitPointTypeEnum extends Enumeration { + val MIDPOINT: TriangulationSplitPointTypeEnum.Value = Value("MIDPOINT") + val NONENCROACHING: TriangulationSplitPointTypeEnum.Value = Value("NONENCROACHING") + + def fromString(value: String): TriangulationSplitPointTypeEnum.Value = + TriangulationSplitPointTypeEnum.values + .find(_.toString == value.toUpperCase(Locale.ROOT)) + .getOrElse( + throw new Error( + s"Invalid mode for triangulation split point type: $value." + + s" Must be one of ${TriangulationSplitPointTypeEnum.values.mkString(",")}" + ) + ) +} diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala index 04387d2f1..09346af7c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevation.scala @@ -5,6 +5,7 @@ import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString import com.databricks.labs.mosaic.core.geometry.multipoint.MosaicMultiPoint import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum._ +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} import com.databricks.labs.mosaic.functions.MosaicExpressionConfig import org.apache.spark.sql.catalyst.InternalRow @@ -13,6 +14,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression} import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String import java.util.Locale @@ -21,6 +23,7 @@ case class ST_InterpolateElevation( linesArray: Expression, mergeTolerance: Expression, snapTolerance: Expression, + splitPointFinder: Expression, gridOrigin: Expression, gridWidthX: Expression, gridWidthY: Expression, @@ -71,6 +74,9 @@ case class ST_InterpolateElevation( } }) + val splitPointFinderValue = + TriangulationSplitPointTypeEnum.fromString(splitPointFinder.eval(input).asInstanceOf[UTF8String].toString) + val origin = geometryAPI.geometry(gridOrigin.eval(input), gridOrigin.dataType).asInstanceOf[MosaicPoint] val gridWidthXValue = gridWidthX.eval(input).asInstanceOf[Int] val gridWidthYValue = gridWidthY.eval(input).asInstanceOf[Int] @@ -82,7 +88,7 @@ case class ST_InterpolateElevation( val gridPoints = multiPointGeom.pointGrid(origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue) val interpolatedPoints = multiPointGeom - .interpolateElevation(linesGeom, gridPoints, mergeToleranceValue, snapToleranceValue) + .interpolateElevation(linesGeom, gridPoints, mergeToleranceValue, snapToleranceValue, splitPointFinderValue) .asSeq val serializedPoints = interpolatedPoints @@ -94,7 +100,12 @@ case class ST_InterpolateElevation( outputRows } - override def children: Seq[Expression] = Seq(pointsArray, linesArray, mergeTolerance, snapTolerance, gridOrigin, gridWidthX, gridWidthY, gridSizeX, gridSizeY) + override def children: Seq[Expression] = + Seq( + pointsArray, linesArray, + mergeTolerance, snapTolerance, splitPointFinder, + gridOrigin, gridWidthX, gridWidthY, gridSizeX, gridSizeY + ) override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = { copy( @@ -102,11 +113,12 @@ case class ST_InterpolateElevation( linesArray = newChildren(1), mergeTolerance = newChildren(2), snapTolerance = newChildren(3), - gridOrigin = newChildren(4), - gridWidthX = newChildren(5), - gridWidthY = newChildren(6), - gridSizeX = newChildren(7), - gridSizeY = newChildren(8) + splitPointFinder = newChildren(4), + gridOrigin = newChildren(5), + gridWidthX = newChildren(6), + gridWidthY = newChildren(7), + gridSizeX = newChildren(8), + gridSizeY = newChildren(9) ) } } @@ -116,16 +128,17 @@ object ST_InterpolateElevation extends WithExpressionInfo { override def name: String = "st_interpolateelevation" override def usage: String = { - "_FUNC_(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9) - Returns the interpolated heights " + - "of the points in the grid defined by `expr5`, `expr6`, `expr7`, `expr8` and `expr9`" + + "_FUNC_(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9, expr10) - Returns the interpolated heights " + + "of the points in the grid defined by `expr6`, `expr7`, `expr8`, `expr9` and `expr10`" + "in the triangulated irregular network formed from the points in `expr1` " + - "including `expr2` as breaklines with tolerance parameters `expr3` and `expr4`." + "including `expr2` as breaklines with tolerance parameters `expr3` and `expr4` and " + + "employing the split point insertion algorithm `expr5`." } override def example: String = """ | Examples: - | > SELECT _FUNC_(a, b, c, d, e, f, g, h, i); + | > SELECT _FUNC_(a, b, c, d, e, f, g, h, i, j); | Point Z (...) | Point Z (...) | ... @@ -134,7 +147,7 @@ object ST_InterpolateElevation extends WithExpressionInfo { override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { - GenericExpressionFactory.getBaseBuilder[ST_InterpolateElevation](9, expressionConfig) + GenericExpressionFactory.getBaseBuilder[ST_InterpolateElevation](10, expressionConfig) } } \ No newline at end of file diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala index 621ac1033..4bc9e8598 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_Triangulate.scala @@ -5,6 +5,7 @@ import com.databricks.labs.mosaic.core.geometry.linestring.MosaicLineString import com.databricks.labs.mosaic.core.geometry.multipoint.MosaicMultiPoint import com.databricks.labs.mosaic.core.geometry.point.MosaicPoint import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum._ +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} import com.databricks.labs.mosaic.functions.MosaicExpressionConfig import org.apache.spark.sql.catalyst.InternalRow @@ -13,6 +14,7 @@ import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expressio import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types.{ArrayType, DataType, StructField, StructType} +import org.apache.spark.unsafe.types.UTF8String import java.util.Locale @@ -21,6 +23,7 @@ case class ST_Triangulate ( linesArray: Expression, mergeTolerance: Expression, snapTolerance: Expression, + splitPointFinder: Expression, expressionConfig: MosaicExpressionConfig ) extends CollectionGenerator @@ -74,8 +77,10 @@ case class ST_Triangulate ( val mergeToleranceVal = mergeTolerance.eval(input).asInstanceOf[Double] val snapToleranceVal = snapTolerance.eval(input).asInstanceOf[Double] + val splitPointFinderVal = + TriangulationSplitPointTypeEnum.fromString(splitPointFinder.eval(input).asInstanceOf[UTF8String].toString) - val triangles = multiPointGeom.triangulate(linesGeom, mergeToleranceVal, snapToleranceVal) + val triangles = multiPointGeom.triangulate(linesGeom, mergeToleranceVal, snapToleranceVal, splitPointFinderVal) val outputGeoms = triangles.map( geometryAPI.serialize(_, firstElementType) @@ -84,10 +89,14 @@ case class ST_Triangulate ( outputRows } - override def children: Seq[Expression] = Seq(pointsArray, linesArray, mergeTolerance, snapTolerance) + override def children: Seq[Expression] = + Seq( + pointsArray, linesArray, + mergeTolerance, snapTolerance, splitPointFinder + ) override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = - copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3)) + copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3), newChildren(4)) } @@ -95,12 +104,14 @@ object ST_Triangulate extends WithExpressionInfo { override def name: String = "st_triangulate" - override def usage: String = "_FUNC_(expr1, expr2, expr3, expr4) - Returns the triangulated irregular network of the points in `expr1` including `expr2` as breaklines with tolerance parameters `expr3` and `expr4`." + override def usage: String = "_FUNC_(expr1, expr2, expr3, expr4, expr5) - Returns the triangulated irregular network " + + "of the points in `expr1` including `expr2` as breaklines with tolerance parameters `expr3` and `expr4` " + + "employing the split point insertion algorithm `expr5`." override def example: String = """ | Examples: - | > SELECT _FUNC_(a, b, c, d); + | > SELECT _FUNC_(a, b, c, d, e); | Point Z (...) | Point Z (...) | ... @@ -108,7 +119,7 @@ object ST_Triangulate extends WithExpressionInfo { | """.stripMargin override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { - GenericExpressionFactory.getBaseBuilder[ST_Triangulate](4, expressionConfig) + GenericExpressionFactory.getBaseBuilder[ST_Triangulate](5, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala index 41ec9c814..7bc2f439d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeoms.scala @@ -8,7 +8,7 @@ import com.databricks.labs.mosaic.core.raster.api.GDAL import com.databricks.labs.mosaic.core.raster.operator.rasterize.GDALRasterize import com.databricks.labs.mosaic.core.types.RasterTileType import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum.MULTIPOINT -import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile +import com.databricks.labs.mosaic.core.types.model.{MosaicRasterTile, TriangulationSplitPointTypeEnum} import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} import com.databricks.labs.mosaic.expressions.raster.base.RasterExpressionSerialization import com.databricks.labs.mosaic.functions.MosaicExpressionConfig @@ -27,11 +27,13 @@ case class RST_DTMFromGeoms( linesArray: Expression, mergeTolerance: Expression, snapTolerance: Expression, + splitPointFinder: Expression, gridOrigin: Expression, gridWidthX: Expression, gridWidthY: Expression, gridSizeX: Expression, gridSizeY: Expression, + noData: Expression, expressionConfig: MosaicExpressionConfig ) extends Expression with Serializable with RasterExpressionSerialization with CodegenFallback { @@ -82,6 +84,9 @@ case class RST_DTMFromGeoms( }) } + val splitPointFinderValue = + TriangulationSplitPointTypeEnum.fromString(splitPointFinder.eval(input).asInstanceOf[UTF8String].toString) + val origin = geometryAPI.geometry(gridOrigin.eval(input), gridOrigin.dataType).asInstanceOf[MosaicPoint] val gridWidthXValue = gridWidthX.eval(input).asInstanceOf[Int] val gridWidthYValue = gridWidthY.eval(input).asInstanceOf[Int] @@ -93,11 +98,13 @@ case class RST_DTMFromGeoms( val gridPoints = multiPointGeom.pointGrid(origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue) val interpolatedPoints = multiPointGeom - .interpolateElevation(linesGeom, gridPoints, mergeToleranceValue, snapToleranceValue) + .interpolateElevation(linesGeom, gridPoints, mergeToleranceValue, snapToleranceValue, splitPointFinderValue) .asSeq + val noDataValue = noData.eval(input).asInstanceOf[Double] + val outputRaster = GDALRasterize.executeRasterize( - interpolatedPoints, None, origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue + interpolatedPoints, None, origin, gridWidthXValue, gridWidthYValue, gridSizeXValue, gridSizeYValue, noDataValue ) val outputRow = MosaicRasterTile(null, outputRaster).serialize(StringType) @@ -107,7 +114,11 @@ case class RST_DTMFromGeoms( override def dataType: DataType = RasterTileType( expressionConfig.getCellIdType, StringType, expressionConfig.isRasterUseCheckpoint) - override def children: Seq[Expression] = Seq(pointsArray, linesArray, mergeTolerance, snapTolerance, gridOrigin, gridWidthX, gridWidthY, gridSizeX, gridSizeY) + override def children: Seq[Expression] = Seq( + pointsArray, linesArray, + mergeTolerance, snapTolerance, splitPointFinder, + gridOrigin, gridWidthX, gridWidthY, gridSizeX, gridSizeY, noData + ) override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = { copy( @@ -115,11 +126,13 @@ case class RST_DTMFromGeoms( linesArray = newChildren(1), mergeTolerance = newChildren(2), snapTolerance = newChildren(3), - gridOrigin = newChildren(4), - gridWidthX = newChildren(5), - gridWidthY = newChildren(6), - gridSizeX = newChildren(7), - gridSizeY = newChildren(8) + splitPointFinder = newChildren(4), + gridOrigin = newChildren(5), + gridWidthX = newChildren(6), + gridWidthY = newChildren(7), + gridSizeX = newChildren(8), + gridSizeY = newChildren(9), + noData = newChildren(10) ) } @@ -131,22 +144,23 @@ object RST_DTMFromGeoms extends WithExpressionInfo { override def name: String = "rst_dtmfromgeoms" override def usage: String = { - "_FUNC_(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9) - Returns the interpolated heights " + - "of the points in the grid defined by `expr5`, `expr6`, `expr7`, `expr8` and `expr9`" + + "_FUNC_(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9, expr10, expr11) - Returns the interpolated heights " + + "of the points in the grid defined by `expr6`, `expr7`, `expr8`, `expr9` and `expr10`" + "in the triangulated irregular network formed from the points in `expr1` " + - "including `expr2` as breaklines with tolerance parameters `expr3` and `expr4` as a raster in GeoTIFF format." + "including `expr2` as breaklines with tolerance parameters `expr3` and `expr4` " + + "employing the split point insertion algorithm `expr5` as a raster in GeoTIFF format with noDataValue `expr11`." } override def example: String = """ | Examples: - | > SELECT _FUNC_(a, b, c, d, e, f, g, h, i); + | > SELECT _FUNC_(a, b, c, d, e, f, g, h, i, j, k); | {index_id, raster_tile, parentPath, driver} | """.stripMargin override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { - GenericExpressionFactory.getBaseBuilder[RST_DTMFromGeoms](9, expressionConfig) + GenericExpressionFactory.getBaseBuilder[RST_DTMFromGeoms](11, expressionConfig) } } \ No newline at end of file diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 0a3c14a1b..b909bdde0 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -624,8 +624,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def st_geometrytype(geom: Column): Column = ColumnAdapter(ST_GeometryType(geom.expr, expressionConfig)) def st_hasvalidcoordinates(geom: Column, crsCode: Column, which: Column): Column = ColumnAdapter(ST_HasValidCoordinates(geom.expr, crsCode.expr, which.expr, expressionConfig)) - def st_interpolateelevation(pointsArray: Column, linesArray: Column, mergetol: Column, snaptol: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column = - ColumnAdapter(geometry.ST_InterpolateElevation(pointsArray.expr, linesArray.expr, mergetol.expr, snaptol.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig)) + def st_interpolateelevation(pointsArray: Column, linesArray: Column, mergetol: Column, snaptol: Column, splitPointFinder: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column = + ColumnAdapter(geometry.ST_InterpolateElevation(pointsArray.expr, linesArray.expr, mergetol.expr, snaptol.expr, splitPointFinder.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig)) def st_intersection(left: Column, right: Column): Column = ColumnAdapter(ST_Intersection(left.expr, right.expr, expressionConfig)) def st_isvalid(geom: Column): Column = ColumnAdapter(ST_IsValid(geom.expr, expressionConfig)) def st_length(geom: Column): Column = ColumnAdapter(ST_Length(geom.expr, expressionConfig)) @@ -647,8 +647,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def st_transform(geom: Column, srid: Column): Column = ColumnAdapter(ST_Transform(geom.expr, srid.expr, expressionConfig)) def st_translate(geom1: Column, xd: Column, yd: Column): Column = ColumnAdapter(ST_Translate(geom1.expr, xd.expr, yd.expr, expressionConfig)) - def st_triangulate(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column): Column = - ColumnAdapter(ST_Triangulate(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, expressionConfig)) + def st_triangulate(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column, splitPointFinder: Column): Column = + ColumnAdapter(ST_Triangulate(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, splitPointFinder.expr, expressionConfig)) def st_x(geom: Column): Column = ColumnAdapter(ST_X(geom.expr, expressionConfig)) def st_y(geom: Column): Column = ColumnAdapter(ST_Y(geom.expr, expressionConfig)) def st_z(geom: Column): Column = ColumnAdapter(ST_Z(geom.expr, expressionConfig)) @@ -704,8 +704,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_clip(raster: Column, geometry: Column, cutline: Boolean): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, lit(cutline).expr, expressionConfig)) def rst_clip(raster: Column, geometry: Column, cutline: Column): Column = ColumnAdapter(RST_Clip(raster.expr, geometry.expr, cutline.expr, expressionConfig)) def rst_convolve(raster: Column, kernel: Column): Column = ColumnAdapter(RST_Convolve(raster.expr, kernel.expr, expressionConfig)) - def rst_dtmfromgeoms(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column): Column = - ColumnAdapter(RST_DTMFromGeoms(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, expressionConfig)) + def rst_dtmfromgeoms(pointsArray: Column, linesArray: Column, mergeTol: Column, snapTol: Column, splitPointFinder: Column, origin: Column, xWidth: Column, yWidth: Column, xSize: Column, ySize: Column, noData: Column): Column = + ColumnAdapter(RST_DTMFromGeoms(pointsArray.expr, linesArray.expr, mergeTol.expr, snapTol.expr, splitPointFinder.expr, origin.expr, xWidth.expr, yWidth.expr, xSize.expr, ySize.expr, noData.expr, expressionConfig)) def rst_pixelcount(raster: Column): Column = ColumnAdapter(RST_PixelCount(raster.expr, lit(false).expr, lit(false).expr, expressionConfig)) def rst_pixelcount(raster: Column, countNoData: Column): Column = ColumnAdapter(RST_PixelCount(raster.expr, countNoData.expr, lit(false).expr, expressionConfig)) def rst_pixelcount(raster: Column, countNoData: Column, countAll: Column): Column = ColumnAdapter(RST_PixelCount(raster.expr, countNoData.expr, countAll.expr, expressionConfig)) diff --git a/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala b/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala index 623466d36..2d4b1301b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala +++ b/src/test/scala/com/databricks/labs/mosaic/core/geometry/multipoint/TestMultiPointJTS.scala @@ -4,10 +4,10 @@ import com.databricks.labs.mosaic.core.geometry.linestring.{MosaicLineString, Mo import com.databricks.labs.mosaic.core.geometry.multipolygon.MosaicMultiPolygonJTS import com.databricks.labs.mosaic.core.geometry.point.MosaicPointJTS import com.databricks.labs.mosaic.core.geometry.polygon.MosaicPolygonJTS -import com.databricks.labs.mosaic.core.raster.operator.rasterize.GDALRasterize +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum +import org.apache.spark.sql.catalyst.InternalRow import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers._ -import org.apache.spark.sql.catalyst.InternalRow //noinspection ScalaRedundantCast class TestMultiPointJTS extends AnyFlatSpec { @@ -128,12 +128,12 @@ class TestMultiPointJTS extends AnyFlatSpec { "MosaicMultiPointJTS" should "perform an unconstrained Delauny tringulation" in { val multiPoint = MosaicMultiPointJTS.fromWKT("MULTIPOINT Z (2 1 0, 3 2 1, 1 3 3, 0 2 2)").asInstanceOf[MosaicMultiPointJTS] - val triangulated = multiPoint.triangulate(Seq(emptyLineString), 0.00, 0.01) + val triangulated = multiPoint.triangulate(Seq(emptyLineString), 0.00, 0.01, TriangulationSplitPointTypeEnum.NONENCROACHING) MosaicMultiPolygonJTS.fromSeq(triangulated).toWKT shouldBe "MULTIPOLYGON Z(((0 2 2, 2 1 0, 1 3 3, 0 2 2)), ((1 3 3, 2 1 0, 3 2 1, 1 3 3)))" } "MosaicMultiPointJTS" should "generate an equally spaced grid of points for use in elevation interpolation" in { - val origin = MosaicPointJTS.fromWKT("POINT (0 0)").asInstanceOf[MosaicPointJTS] + val origin = MosaicPointJTS.fromWKT("POINT (-0.5 -0.5)").asInstanceOf[MosaicPointJTS] val grid = MosaicMultiPointJTS.fromWKT("MULTIPOINT (0 0, 0 1, 0 2, 1 0, 1 1, 1 2, 2 0, 2 1, 2 2)").asInstanceOf[MosaicMultiPointJTS] val generatedGrid = grid.pointGrid(origin, 3, 3, 1.0, 1.0) generatedGrid.toWKT shouldBe grid.toWKT @@ -141,9 +141,9 @@ class TestMultiPointJTS extends AnyFlatSpec { "MosaicMultiPointJTS" should "perform elevation interpolation" in { val multiPoint = MosaicMultiPointJTS.fromWKT("MULTIPOINT Z (2.5 1.5 0, 3.5 2.5 1, 1.5 3.5 3, 0.5 2.5 2)").asInstanceOf[MosaicMultiPointJTS] - val origin = MosaicPointJTS.fromWKT("POINT (0 0)").asInstanceOf[MosaicPointJTS] + val origin = MosaicPointJTS.fromWKT("POINT (-0.5 -0.5)").asInstanceOf[MosaicPointJTS] val gridPoints = multiPoint.pointGrid(origin, 5, 5, 1, 1).intersection(multiPoint.convexHull).asInstanceOf[MosaicMultiPointJTS] - val z = multiPoint.interpolateElevation(Seq(emptyLineString), gridPoints, 0.00, 0.01) + val z = multiPoint.interpolateElevation(Seq(emptyLineString), gridPoints, 0.00, 0.01, TriangulationSplitPointTypeEnum.NONENCROACHING) z.toWKT shouldBe "MULTIPOINT Z((1 3 2.5), (2 2 0.8333333333333334), (2 3 2.1666666666666665), (3 2 0.5))" z.asSeq.map(_.getZ) shouldBe Seq(2.5, 0.8333333333333334, 2.1666666666666665, 0.5) } diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala index 42c719a31..e57f0f435 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_InterpolateElevationBehaviours.scala @@ -2,6 +2,7 @@ package com.databricks.labs.mosaic.expressions.geometry import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum import com.databricks.labs.mosaic.functions.MosaicContext import com.databricks.labs.mosaic.functions.MosaicRegistryBehaviors.mosaicContext import org.apache.spark.sql.functions._ @@ -22,6 +23,7 @@ trait ST_InterpolateElevationBehaviours extends QueryTest { val ySize = -1.0 val mergeTolerance = 0.0 val snapTolerance = 0.01 + val splitPointFinder = TriangulationSplitPointTypeEnum.NONENCROACHING val origin = "POINT(348000 462000)" def simpleInterpolationBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { @@ -45,18 +47,22 @@ trait ST_InterpolateElevationBehaviours extends QueryTest { .withColumn("breaklines", array().cast(ArrayType(StringType))) .withColumn("mergeTolerance", lit(mergeTolerance)) .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("splitPointFinder", lit(splitPointFinder.toString)) .withColumn("origin", st_geomfromwkt(lit(origin))) .withColumn("grid_size_x", lit(xWidth)) .withColumn("grid_size_y", lit(yWidth)) .withColumn("pixel_size_x", lit(xSize)) .withColumn("pixel_size_y", lit(ySize)) .withColumn("elevation", st_interpolateelevation( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"masspoints", $"breaklines", + $"mergeTolerance", $"snapTolerance", $"splitPointFinder", $"origin", $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y")) .drop( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", - $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + $"masspoints", $"breaklines", + $"mergeTolerance", $"snapTolerance", $"splitPointFinder", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y" ) noException should be thrownBy result.collect() result.count() shouldBe 1000000L @@ -93,19 +99,23 @@ trait ST_InterpolateElevationBehaviours extends QueryTest { .crossJoin(linesDf) .withColumn("mergeTolerance", lit(mergeTolerance)) .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("splitPointFinder", lit(splitPointFinder.toString)) .withColumn("origin", st_geomfromwkt(lit(origin))) .withColumn("grid_size_x", lit(xWidth)) .withColumn("grid_size_y", lit(yWidth)) .withColumn("pixel_size_x", lit(xSize)) .withColumn("pixel_size_y", lit(ySize)) .withColumn("interpolated_grid_point", st_interpolateelevation( - $"masspoints", $"breaklines",$"mergeTolerance", $"snapTolerance", + $"masspoints", $"breaklines", + $"mergeTolerance", $"snapTolerance", $"splitPointFinder", $"origin", $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y")) .withColumn("elevation", st_z($"interpolated_grid_point")) .drop( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", - $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + $"masspoints", $"breaklines", + $"mergeTolerance", $"snapTolerance", $"splitPointFinder", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y" ) .cache() noException should be thrownBy result.collect() diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala index 71e79ca86..dc3679c7d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateBehaviours.scala @@ -2,10 +2,10 @@ package com.databricks.labs.mosaic.expressions.geometry import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum import com.databricks.labs.mosaic.functions.MosaicContext -import com.databricks.labs.mosaic.functions.MosaicRegistryBehaviors.mosaicContext import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.functions.{array, collect_list, explode, lit} +import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.scalatest.matchers.must.Matchers.noException import org.scalatest.matchers.should.Matchers._ @@ -17,16 +17,17 @@ trait ST_TriangulateBehaviours extends QueryTest { val linesPath = "src/test/resources/binary/elevation/sd46_dtm_breakline.shp" val outputRegion = "POLYGON((348000 462000, 348000 461000, 349000 461000, 349000 462000, 348000 462000))" val buffer = 50.0 - val mergeTolerance = 0.0 + val mergeTolerance = 1e-2 val snapTolerance = 0.01 + val splitPointFinder = TriangulationSplitPointTypeEnum.NONENCROACHING def simpleTriangulateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { - val mc = mosaicContext - import mc.functions._ val sc = spark import sc.implicits._ - mc.register(spark) + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + import mc.functions._ val points = MosaicContext.read .option("asWKB", "true") @@ -39,20 +40,20 @@ trait ST_TriangulateBehaviours extends QueryTest { .groupBy() .agg(collect_list($"geom_0").as("masspoints")) .withColumn("breaklines", array().cast(ArrayType(StringType))) - .withColumn("mesh", st_triangulate($"masspoints", $"breaklines", lit(mergeTolerance), lit(snapTolerance))) + .withColumn("mesh", st_triangulate($"masspoints", $"breaklines", lit(mergeTolerance), lit(snapTolerance), lit(splitPointFinder.toString))) .drop($"masspoints") noException should be thrownBy result.collect() - result.count() shouldBe 4445 + result.count() shouldBe 4453 } def conformingTriangulateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { - val mc = mosaicContext - import mc.functions._ val sc = spark import sc.implicits._ - mc.register(spark) + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + import mc.functions._ val points = MosaicContext.read .option("asWKB", "true") @@ -77,7 +78,7 @@ trait ST_TriangulateBehaviours extends QueryTest { .groupBy() .agg(collect_list($"geom_0").as("masspoints")) .crossJoin(linesDf) - .withColumn("mesh", st_triangulate($"masspoints", $"breaklines", lit(mergeTolerance), lit(snapTolerance))) + .withColumn("mesh", st_triangulate($"masspoints", $"breaklines", lit(mergeTolerance), lit(snapTolerance), lit(splitPointFinder.toString))) .drop($"masspoints", $"breaklines") noException should be thrownBy result.collect() diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala index 797a787cd..4acd26216 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TriangulateTest.scala @@ -8,5 +8,4 @@ import org.apache.spark.sql.test.SharedSparkSession class ST_TriangulateTest extends QueryTest with SharedSparkSession with ST_TriangulateBehaviours { test("Testing ST_Triangulate (H3, JTS) to produce unconstrained triangulation") { simpleTriangulateBehavior(H3IndexSystem, JTS)} test("Testing ST_Triangulate (H3, JTS) to produce conforming triangulation") { conformingTriangulateBehavior(H3IndexSystem, JTS)} - } diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala index 17eafe14e..63bfea25a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsBehaviours.scala @@ -2,6 +2,7 @@ package com.databricks.labs.mosaic.expressions.raster import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.types.model.TriangulationSplitPointTypeEnum import com.databricks.labs.mosaic.functions.MosaicContext import org.apache.spark.sql.functions.{array, collect_list, lit} import org.apache.spark.sql.test.SharedSparkSessionGDAL @@ -12,8 +13,10 @@ trait RST_DTMFromGeomsBehaviours extends SharedSparkSessionGDAL { val pointsPath = "src/test/resources/binary/elevation/sd46_dtm_point.shp" val linesPath = "src/test/resources/binary/elevation/sd46_dtm_breakline.shp" - val mergeTolerance = 0.0 + val mergeTolerance = 1e-6 val snapTolerance = 0.01 + val splitPointFinder = TriangulationSplitPointTypeEnum.NONENCROACHING + val noData = -9999.0 def simpleRasterizeTest(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { @@ -35,18 +38,21 @@ trait RST_DTMFromGeomsBehaviours extends SharedSparkSessionGDAL { .withColumn("breaklines", array().cast(ArrayType(StringType))) .withColumn("mergeTolerance", lit(mergeTolerance)) .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("splitPointFinder", lit(splitPointFinder.toString)) .withColumn("origin", st_point(lit(348000.0), lit(462000.0))) .withColumn("grid_size_x", lit(1000)) .withColumn("grid_size_y", lit(1000)) .withColumn("pixel_size_x", lit(1.0)) .withColumn("pixel_size_y", lit(-1.0)) + .withColumn("noData", lit(noData)) .withColumn("tile", rst_dtmfromgeoms( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"splitPointFinder", $"origin", $"grid_size_x", $"grid_size_y", - $"pixel_size_x", $"pixel_size_y")) + $"pixel_size_x", $"pixel_size_y", $"noData")) .drop( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", - $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"splitPointFinder", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y", $"noData" ).cache() noException should be thrownBy result.collect() @@ -88,18 +94,21 @@ trait RST_DTMFromGeomsBehaviours extends SharedSparkSessionGDAL { .crossJoin(linesDf) .withColumn("mergeTolerance", lit(mergeTolerance)) .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("splitPointFinder", lit(splitPointFinder.toString)) .withColumn("origin", st_point(lit(348000.0), lit(462000.0))) .withColumn("grid_size_x", lit(1000)) .withColumn("grid_size_y", lit(1000)) .withColumn("pixel_size_x", lit(1.0)) .withColumn("pixel_size_y", lit(-1.0)) + .withColumn("noData", lit(noData)) .withColumn("tile", rst_dtmfromgeoms( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"splitPointFinder", $"origin", $"grid_size_x", $"grid_size_y", - $"pixel_size_x", $"pixel_size_y")) + $"pixel_size_x", $"pixel_size_y", $"noData")) .drop( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"origin", - $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"splitPointFinder", + $"origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y", $"noData" ).cache() noException should be thrownBy result.collect() } @@ -152,22 +161,26 @@ trait RST_DTMFromGeomsBehaviours extends SharedSparkSessionGDAL { val inputsDf = pointsDf .join(linesDf, Seq("index_id", "raster_origin"), "left") + .withColumn("index_id", $"index_id".cast(StringType)) val result = inputsDf .repartition(sc.sparkContext.defaultParallelism) .withColumn("mergeTolerance", lit(mergeTolerance)) .withColumn("snapTolerance", lit(snapTolerance)) + .withColumn("splitPointFinder", lit(splitPointFinder.toString)) .withColumn("grid_size_x", lit(1000)) .withColumn("grid_size_y", lit(1000)) .withColumn("pixel_size_x", lit(1.0)) .withColumn("pixel_size_y", lit(-1.0)) + .withColumn("noData", lit(noData)) .withColumn("tile", rst_dtmfromgeoms( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"splitPointFinder", $"raster_origin", $"grid_size_x", $"grid_size_y", - $"pixel_size_x", $"pixel_size_y")) + $"pixel_size_x", $"pixel_size_y", $"noData")) .drop( - $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"raster_origin", - $"grid_size_x", $"grid_size_y", $"pixel_size_x", $"pixel_size_y" + $"masspoints", $"breaklines", $"mergeTolerance", $"snapTolerance", $"splitPointFinder", + $"raster_origin", $"grid_size_x", $"grid_size_y", + $"pixel_size_x", $"pixel_size_y", $"noData" ).cache() noException should be thrownBy result.collect() result.count() shouldBe inputsDf.count() diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala index 5196e9443..a8000aadd 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DTMFromGeomsTest.scala @@ -10,9 +10,9 @@ class RST_DTMFromGeomsTest extends QueryTest with SharedSparkSessionGDAL with RS assume(System.getProperty("os.name") == "Linux") simpleRasterizeTest(H3IndexSystem, JTS) } - test("Testing RST_DTMFromGeoms for conforming triangulation with manual GDAL registration (H3, JTS).") { + test("Testing RST_DTMFromGeoms for conforming triangulation with manual GDAL registration (BNG, JTS).") { assume(System.getProperty("os.name") == "Linux") - conformedTriangulationRasterizeTest(H3IndexSystem, JTS) + conformedTriangulationRasterizeTest(BNGIndexSystem, JTS) } registerIgnoredTest("Ignored due to resource / duration")( diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetSRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetSRIDBehaviors.scala index 684333cd4..e924e03ee 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetSRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetSRIDBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.{MOSAIC_RASTER_READ_IN_MEMORY, MOSAIC_RASTER_READ_STRATEGY} import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -16,10 +17,13 @@ trait RST_SetSRIDBehaviors extends QueryTest { import mc.functions._ import sc.implicits._ + val modisPath = this.getClass.getResource("/modis/").getPath + val rastersInMemory = spark.read .format("gdal") - .option("raster_storage", "in-memory") - .load("src/test/resources/modis") + .option(MOSAIC_RASTER_READ_STRATEGY, MOSAIC_RASTER_READ_IN_MEMORY) + .option("pathGlobFilter", "*.TIF") + .load(modisPath) val df = rastersInMemory .withColumn("result", rst_setsrid($"tile", lit(4326))) diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index 95f71978f..9570b5d75 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -2,15 +2,31 @@ package org.apache.spark.sql.test import com.databricks.labs.mosaic.gdal.MosaicGDAL import com.databricks.labs.mosaic.utils.FileUtils -import com.databricks.labs.mosaic.{MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT, MOSAIC_RASTER_USE_CHECKPOINT, MOSAIC_RASTER_USE_CHECKPOINT_DEFAULT, MOSAIC_TEST_MODE} +import com.databricks.labs.mosaic._ import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession -import org.gdal.gdal.gdal +import org.scalatest.{Args, CompositeStatus, Status} import scala.util.Try trait SharedSparkSessionGDAL extends SharedSparkSession { + + var checkpointingEnabled: Boolean = _ + def checkpointingStatus: String = if (checkpointingEnabled) "enabled" else "disabled" + + override protected def runTest(testName: String, args: Args): Status = { + val statuses = for (checkpointing <- Seq(true, false)) yield { + checkpointingEnabled = checkpointing + spark.conf.set(MOSAIC_RASTER_USE_CHECKPOINT, checkpointing) + spark.sparkContext.setLogLevel("INFO") + logInfo(s"Raster checkpointing is $checkpointingStatus") + spark.sparkContext.setLogLevel("ERROR") + super.runTest(testName, args) + } + new CompositeStatus(statuses.toSet) + } + override def sparkConf: SparkConf = { super.sparkConf .set(MOSAIC_GDAL_NATIVE, "true") @@ -32,9 +48,6 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def beforeEach(): Unit = { super.beforeEach() - sparkConf.set(MOSAIC_RASTER_USE_CHECKPOINT, MOSAIC_RASTER_USE_CHECKPOINT_DEFAULT) - MosaicGDAL.enableGDAL(this.spark) - gdal.AllRegister() } }