## <span style="color: #7b73e2;" >Start WherobotsDB</span>

In [None]:
from sedona.spark import SedonaContext
from sedona.raster_utils.SedonaUtils import SedonaUtils
from sedona.maps.SedonaKepler import SedonaKepler
from pyspark.sql.functions import expr

config = (
    SedonaContext.builder()
    .config("spark.executor.memory", "3600m")
    .getOrCreate()
)

sedona = SedonaContext.create(config)

## <span style="color: #7b73e2;">Load Aerial Imagery Efficiently</span>

In this step, we'll load the aerial imagery so we can run <span style="color: #7b73e2;">**inference**</span> in a later step.

The GeoTIFF image is large, so we'll split it into tiles and load those tiles as <span style="color: #7b73e2;">**out-of-database**</span> or <span style="color: #7b73e2;">**\"out-db\" rasters**</span> in **WherobotsDB**.

In [None]:
url = "s3://wherobots-examples/data/naip/mn/*.tif"

catalog = 'matt'

 
df = sedona.read.format("raster") \
    .option("retile", "false").load(url)

df.createOrReplaceTempView("df")
df.show()

## <span style="color: #7b73e2;">Viewing the Model's Imagery Inputs</span>

We can see the footprints of the tiled images with the `SedonaKepler.create_map()` integration. Using `SedonaUtils.display_image()` we can view the images as well.

<span style="color: #7b73e2;;"> **Tip:** </span>  Save the map to a html file using `kepler_map.save_to_html()`

In [None]:
kepler_map = SedonaKepler.create_map()
df = df.withColumn('footprint', expr("ST_TRANSFORM(RS_CONVEXHULL(rast),'EPSG:4326')"))
SedonaKepler.add_df(kepler_map, df=df, name="Image Footprints")

kepler_map

In [None]:
htmlDf = sedona.sql(f"""SELECT RS_AsImage(rast, 250) as FROM df limit 5""")
SedonaUtils.display_image(htmlDf)

In [None]:
model_id = "sam2"
prompt = "park with a baseball field in it"
threshold = 0.2

preds = sedona.sql(
    f"""SELECT rast, RS_TEXT_TO_SEGMENTS('{model_id}', rast, '{prompt}', {threshold}) AS preds from df"""
)
preds.cache().count()
preds.createOrReplaceTempView("preds")

In [None]:
preds_filtered = sedona.sql(f"""
  SELECT *
  FROM preds
  WHERE
    size(preds.labels) > 0
    AND array_contains(preds.labels, 1)
    AND NOT array_contains(preds.segments_wkt, 'POLYGON EMPTY')
""")
preds_filtered.createOrReplaceTempView("preds_filtered")
preds_filtered.show()

In [None]:
exploded = sedona.sql("""
SELECT
    rast,
    exploded_predictions.*
FROM
    preds_filtered
LATERAL VIEW explode(arrays_zip(preds.segments_wkt, preds.confidence_scores, preds.labels)) AS exploded_predictions
WHERE
    exploded_predictions.confidence_scores != 0.0
""")
exploded.cache().count()
exploded.createOrReplaceTempView("exploded")
exploded.show()

In [None]:
exploded.createOrReplaceTempView('exploded')

In [None]:
catalog = 'matt'
sedona.sql(f'CREATE OR REPLACE TABLE wherobots.{catalog}.baseball_parks_sam AS SELECT * FROM exploded')