Skip to content

Commit

Permalink
Add examples for stacking bands into a single raster.
Browse files Browse the repository at this point in the history
Add functionality for band stacking.
  • Loading branch information
milos.colic committed Aug 10, 2023
1 parent ee80140 commit e4588ea
Show file tree
Hide file tree
Showing 19 changed files with 1,006 additions and 76 deletions.
153 changes: 128 additions & 25 deletions notebooks/prototypes/grid_tiles/00 Download STACs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,94 @@

# COMMAND ----------

# MAGIC %reload_ext autoreload
# MAGIC %autoreload 2
# MAGIC %reload_ext library

# COMMAND ----------

spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled", "false")

# COMMAND ----------

# MAGIC %md
# MAGIC We have selected an area near Seatle for this demo, this is just an illustrative choice, the code will work with an area anywhere on the surface of the planet. Our solution provides an easy way to tesselate the area into indexed pieces. This tessellation will allow us to parallelise the download of the data.
# MAGIC We will download census data from TIGER feed for this demo. The data can be downloaded as a zip to dbfs (or managed volumes).

# COMMAND ----------

import urllib.request
urllib.request.urlretrieve(
"https://www2.census.gov/geo/tiger/TIGER2021/COUNTY/tl_2021_us_county.zip",
"/dbfs/FileStore/geospatial/odin/census/data.zip"
)

# COMMAND ----------

# MAGIC %sh ls -al /dbfs/FileStore/geospatial/odin/census/

# COMMAND ----------

# MAGIC %md
# MAGIC Mosaic has specialised readers for shape files and other GDAL supported formats. We dont need to unzip the data zip file. Just need to pass "vsizip" option to the reader.

# COMMAND ----------

census_df = mos.read().format("multi_read_ogr")\
.option("vsizip", "true")\
.option("chunkSize", "50")\
.load("dbfs:/FileStore/geospatial/odin/census/data.zip")\
.cache() # We will cache the loaded data to avoid schema inference being done repeatedly for each query

# COMMAND ----------

# MAGIC %md
# MAGIC For this exmaple we will focus on Alaska counties. Alska state code is 02 so we will apply a filter to our ingested data.

# COMMAND ----------

census_df.where("STATEFP == 2").display()

# COMMAND ----------

to_display = census_df\
.where("STATEFP == 2")\
.withColumn(
"geom_0",
mos.st_updatesrid("geom_0", "geom_0_srid", F.lit(4326))
)\
.select("geom_0")

# COMMAND ----------

# MAGIC %%mosaic_kepler
# MAGIC to_display geom_0 geometry 50

# COMMAND ----------

cells = census_df\
.where("STATEFP == 2")\
.withColumn(
"geom_0",
mos.st_updatesrid("geom_0", "geom_0_srid", F.lit(4326))
)\
.withColumn("geom_0_srid", F.lit(4326))\
.withColumn(
"grid",
mos.grid_tessellateexplode("geom_0", F.lit(3))
)

# COMMAND ----------

cells = library.generate_cells((-123, 47, -122, 48), 8, spark, mos)
cells.display()

# COMMAND ----------

to_display = cells.select("grid.wkb")
to_display = cells.select(mos.st_simplify("grid.wkb", F.lit(0.1)).alias("wkb"))

# COMMAND ----------

# MAGIC %%mosaic_kepler
# MAGIC to_display wkb geometry 200000
# MAGIC to_display wkb geometry 100000

# COMMAND ----------

Expand All @@ -65,24 +133,15 @@

# COMMAND ----------

time_range = "2020-12-01/2020-12-31"
bbox = [-123, 47, -122, 48]

search = catalog.search(collections=["landsat-c2-l2"], bbox=bbox, datetime=time_range)
items = search.item_collection()
items

# COMMAND ----------

[json.dumps(item.to_dict()) for item in items]
time_range = "2021-06-01/2021-06-07"

# COMMAND ----------

cell_jsons = cells.select(
F.hash("geom").alias("area_id"),
F.col("grid.index_id").alias("h3"),
mos.st_asgeojson("grid.wkb").alias("geojson")
)
cell_jsons = cells\
.withColumn("area_id", F.hash("geom_0"))\
.withColumn("h3", F.col("grid.index_id"))\
.withColumn("geojson", mos.st_asgeojson("grid.wkb"))\
.drop("geom_0")

# COMMAND ----------

Expand All @@ -95,12 +154,33 @@

# COMMAND ----------

cell_jsons.count()

# COMMAND ----------

# MAGIC %md
# MAGIC Our framework allows for easy preparation of stac requests with only one line of code. This data is delta ready as this point and can easily be stored for lineage purposes.

# COMMAND ----------

eod_items = library.get_assets_for_cells(cell_jsons.limit(200))
census_jsons = census_df\
.where("STATEFP == 2")\
.withColumn(
"geom_0",
mos.st_updatesrid("geom_0", "geom_0_srid", F.lit(4326))
)\
.withColumn("geom_0_srid", F.lit(4326))\
.withColumn("area_id", F.hash("geom_0"))\
.withColumn("geojson", mos.st_asgeojson("geom_0"))

# COMMAND ----------

eod_items = library.get_assets_for_cells(census_jsons.repartition(10), time_range ,"sentinel-2-l2a" ).cache()
#eod_items.display()

# COMMAND ----------

eod_items = library.get_assets_for_cells(cell_jsons.repartition(200), time_range ,"sentinel-2-l2a" ).cache()
eod_items.display()

# COMMAND ----------
Expand All @@ -110,21 +190,40 @@

# COMMAND ----------

to_download = library.get_unique_hrefs(eod_items)
item_name = "BO1"

to_download = eod_items\
.withColumn("timestamp", F.col("item_properties.datetime"))\
.groupBy("item_id", "timestamp")\
.agg(
*[F.first(cn).alias(cn) for cn in eod_items.columns]
)\
.withColumn("date", F.to_date("timestamp"))\
.where(
f"asset.name == '{item_name}'"
)

# COMMAND ----------

#to_download = library.get_unique_hrefs(eod_items, item_name="B01")
to_download.display()

# COMMAND ----------

# MAGIC %fs mkdirs /FileStore/geospatial/odin/dais23demo
# MAGIC %fs mkdirs /FileStore/geospatial/odin/alaska/WVP

# COMMAND ----------

catalof_df = to_download\
.withColumn("outputfile", library.download_asset("href", F.lit("/dbfs/FileStore/geospatial/odin/dais23demo"), F.concat(F.hash(F.rand()), F.lit("tif"))))
.withColumn(
"outputfile",
library.download_asset("href", F.lit("/dbfs/FileStore/geospatial/odin/alaskaWVP"),
F.concat(F.hash(F.rand()), F.lit(".tif")))
)

# COMMAND ----------

catalof_df.write.format("delta").saveAsTable("mosaic_odin_files")
catalof_df.write.mode("overwrite").format("delta").saveAsTable("mosaic_odin_alaska_WVP")

# COMMAND ----------

Expand All @@ -133,7 +232,11 @@

# COMMAND ----------

catalof_df = spark.read.table("mosaic_odin_files")
# MAGIC %fs ls /FileStore/geospatial/odin/alaskaWVP

# COMMAND ----------

catalof_df = spark.read.table("mosaic_odin_alaska_B04")

# COMMAND ----------

Expand Down
54 changes: 52 additions & 2 deletions notebooks/prototypes/grid_tiles/02 Dev & Test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
# COMMAND ----------

spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled", "false")
spark.conf.set("spark.sql.adaptive.enabled", "false")

# COMMAND ----------

Expand All @@ -42,7 +43,8 @@

# COMMAND ----------

catalog_df = spark.read.table("mosaic_odin_files")
catalog_df = spark.read.table("mosaic_odin_files")\
.repartition(300, "outputfile", F.rand())
catalog_df.display()

# COMMAND ----------
Expand All @@ -54,6 +56,7 @@
# COMMAND ----------

tiles_df = catalog_df\
.repartition(300, "outputfile", F.rand())\
.withColumn("raster", mos.rst_subdivide("outputfile", F.lit(16)))\
.withColumn("size", mos.rst_memsize("raster"))

Expand Down Expand Up @@ -112,7 +115,7 @@

# COMMAND ----------

library.plot_raster(to_plot[5]["raster"])
library.plot_raster(to_plot[45]["raster"])

# COMMAND ----------

Expand Down Expand Up @@ -257,4 +260,51 @@ def mean_band_1(dataset):

# COMMAND ----------

with_measurement = spark.read.table("mosaic_odin_gridded")

# COMMAND ----------

library.plot_raster(with_measurement.limit(50).collect()[0]["raster"])

# COMMAND ----------

# MAGIC %pip install torch transformers

# COMMAND ----------

import torch
from PIL import Image
import requests
from transformers import SamModel, SamProcessor

# COMMAND ----------

device = "cuda" if torch.cuda.is_available() else "cpu"
model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")

# COMMAND ----------

img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
input_points = [[[450, 600]]] # 2D location of a window in the image

# COMMAND ----------

inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
outputs = model(**inputs)

# COMMAND ----------

masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
)
scores = outputs.iou_scores

# COMMAND ----------

scores

# COMMAND ----------


Loading

0 comments on commit e4588ea

Please sign in to comment.