Skip to content

Commit

Permalink
Add BalancedSubdivision for dividing rasters into pieces that maintai…
Browse files Browse the repository at this point in the history
…n x/y ratio.

Add GDALBuildVRT for executing build VRT programs.
Add GDALTranslate for executing translate programs.
Add GDALWarp for executing warp programs.
Refactor GDALFileFormat to support ReadStrategies.
Handle Axis Orientation in IndexSystem.
Add MergeRasters operator.
Add PathUtils to centralise all the path handling for GDAL and FUSE locations.
Add RasterChipType.
Add RasterCleaner for handling deletion and flushing of rasters between stages.
Add RST_Tessellate expression.
Add RST_Merge and RST_MergeAgg expressions.
Add ReTileOnRead read strategy.
Add ReadAsPath read strategy.
Add ReadInMemory read strategy.
  • Loading branch information
milos.colic committed Jun 24, 2023
1 parent 35e0fef commit cfe0a04
Show file tree
Hide file tree
Showing 92 changed files with 2,022 additions and 506 deletions.
27 changes: 27 additions & 0 deletions python/mosaic/api/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"rst_srid",
"rst_subdatasets",
"rst_summary",
"rst_tessellate",
"rst_upperleftx",
"rst_upperlefty",
"rst_width",
Expand Down Expand Up @@ -611,6 +612,32 @@ def rst_summary(raster: ColumnOrName) -> Column:
pyspark_to_java_column(raster)
)

def rst_tessellate(raster: ColumnOrName, resolution: ColumnOrName) -> Column:
"""
Computes the summary of the raster.
The summary is a map of the statistics of the raster.
The logic is produced by gdalinfo procedure.
The result is stored as JSON.
Parameters
----------
raster : Column (StringType)
Path to the raster file.
resolution : Column (IntegerType)
The resolution of the tiles.
Returns
-------
Column (RasterTiles)
A struct containing the tiles of the raster.
"""
return config.mosaic_context.invoke_function(
"rst_tessellate",
pyspark_to_java_column(raster),
pyspark_to_java_column(resolution)
)

def rst_upperleftx(raster: ColumnOrName) -> Column:
"""
Computes the upper left X coordinate of the raster.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ com.databricks.labs.mosaic.datasource.ShapefileFileFormat
com.databricks.labs.mosaic.datasource.GeoDBFileFormat
com.databricks.labs.mosaic.datasource.OpenGeoDBFileFormat
com.databricks.labs.mosaic.datasource.OGRFileFormat
com.databricks.labs.mosaic.datasource.GDALFileFormat
com.databricks.labs.mosaic.datasource.gdal.GDALFileFormat
com.databricks.labs.mosaic.datasource.UserDefinedFileFormat
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ abstract class IndexSystem(var cellIdType: DataType) extends Serializable {
def osrSpatialRef: SpatialReference = {
val sr = new SpatialReference()
sr.ImportFromEPSG(crsID)
sr.SetAxisMappingStrategy(org.gdal.osr.osrConstants.OAMS_TRADITIONAL_GIS_ORDER)
sr
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package com.databricks.labs.mosaic.core.raster
import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
import com.databricks.labs.mosaic.core.index.IndexSystem
import com.databricks.labs.mosaic.core.raster.gdal_raster.RasterWriter
import com.databricks.labs.mosaic.core.raster.gdal_raster.{RasterCleaner, RasterWriter}
import org.gdal.gdal.Dataset
import org.gdal.osr.SpatialReference

Expand All @@ -20,7 +20,14 @@ import org.gdal.osr.SpatialReference
abstract class MosaicRaster(
isInMem: Boolean
) extends Serializable
with RasterWriter {
with RasterWriter
with RasterCleaner {

def getBands: Seq[MosaicRasterBand] = (1 to numBands).map(getBand)

def refresh(): Unit

def getDimensions: (Int, Int)

def uuid: Long

Expand Down Expand Up @@ -95,4 +102,6 @@ abstract class MosaicRaster(
/** A method that a boolean flat set to true if the raster is empty. */
def isEmpty: Boolean

def getBandStats: Map[Int, Map[String, Double]]

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ package com.databricks.labs.mosaic.core.raster
*/
trait MosaicRasterBand extends Serializable {

def isNoDataMask: Boolean

def maskFlags: Seq[Any]

/** @return Returns the bandId of the band. */
def index: Int

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,7 @@ case class MosaicRasterBandGDAL(band: Band, id: Int) extends MosaicRasterBand {
}
}

override def maskFlags: Seq[Any] = Seq(band.GetMaskFlags())

override def isNoDataMask: Boolean = band.GetMaskFlags() == gdalconstConstants.GMF_NODATA
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
package com.databricks.labs.mosaic.core.raster.api

import com.databricks.labs.mosaic.core.raster._
import com.databricks.labs.mosaic.core.raster.gdal_raster.{MosaicRasterGDAL, RasterReader, RasterTransform}
import org.apache.spark.sql.catalyst.InternalRow
import com.databricks.labs.mosaic.core.raster.gdal_raster.{MosaicRasterGDAL, RasterCleaner, RasterReader, RasterTransform}
import org.apache.spark.sql.types.{BinaryType, DataType, StringType}
import org.apache.spark.unsafe.types.UTF8String
import org.gdal.gdal.gdal
Expand All @@ -28,16 +27,19 @@ abstract class RasterAPI(reader: RasterReader) extends Serializable {
}
}

def writeRasters(generatedRasters: Seq[MosaicRaster], checkpointPath: String, rasterDT: DataType): Seq[InternalRow] = {
def writeRasters(generatedRasters: Seq[MosaicRaster], checkpointPath: String, rasterDT: DataType): Seq[Any] = {
generatedRasters.map(raster =>
rasterDT match {
case StringType =>
val writePath = s"$checkpointPath/${raster.uuid}"
val extension = raster.getRaster.GetDriver().GetMetadataItem("DMD_EXTENSION")
val writePath = s"$checkpointPath/${raster.uuid}.$extension"
val outPath = raster.writeToPath(writePath)
InternalRow.fromSeq(Seq(UTF8String.fromString(outPath)))
RasterCleaner.dispose(raster)
UTF8String.fromString(outPath)
case BinaryType =>
val bytes = raster.writeToBytes()
InternalRow.fromSeq(Seq(bytes))
RasterCleaner.dispose(raster)
bytes
}
)
}
Expand All @@ -63,6 +65,8 @@ abstract class RasterAPI(reader: RasterReader) extends Serializable {
*/
def raster(path: String): MosaicRaster = reader.readRaster(path)

def raster(content: Array[Byte]): MosaicRaster = reader.readRaster(content)

/**
* Reads a raster from the given path. It extracts the specified band from
* the raster. If zip, use band(path, bandIndex, vsizip = true)
Expand Down
Loading

0 comments on commit cfe0a04

Please sign in to comment.