# Loading Data

In [None]:
from sedona.spark import *

config = SedonaContext.builder(). \
    config("spark.hadoop.fs.s3a.bucket.wherobots-examples.aws.credentials.provider",
           "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"). \
    getOrCreate()

sedona = SedonaContext.create(config)

## Loading Common Vector Data Formats

### Loading A CSV File From A Public S3 Bucket

In [None]:
S3_CSV_URL = "s3a://wherobots-examples/data/examples/birdbuddy_oct23.csv"
bb_df = sedona.read.format('csv'). \
    option('header', 'true'). \
    option('delimiter', ','). \
    option('inferSchema', 'true'). \
    load(S3_CSV_URL)

bb_df = bb_df.selectExpr(
    'ST_Point(anonymized_longitude, anonymized_latitude) AS location', 
    'timestamp', 
    'common_name', 
    'scientific_name')

bb_df.sample(0.001).cache().createOrReplaceTempView('bb')
bb_df.show(truncate=False)

In [None]:
SedonaKepler.create_map(df=bb_df.sample(0.001), name="Bird Species")

## GeoJSON

In [None]:
S3_URL_JSON = "s3://wherobots-examples/data/examples/usgs/watershed_boundaries_huc6.geojson"

In [None]:
watershed_df = sedona.read.format("geojson"). \
    option("multiLine", "true"). \
    load(S3_URL_JSON). \
    selectExpr("explode(features) as features"). \
    select("features.*")
watershed_df.createOrReplaceTempView("watersheds")
watershed_df.printSchema()

In [None]:
california_df = sedona.sql("""
SELECT geometry, properties.name, properties.huc6
FROM watersheds
WHERE properties.states LIKE "%CA%"
""")

california_df.createOrReplaceTempView("ca_watersheds")

SedonaKepler.create_map(df=california_df.cache(), name="California Watersheds")

## Shapefiles

In [None]:
S3_URL_SHAPEFILE = "s3://wherobots-examples/data/examples/natural_earth/ne_10m_admin_0_countries/"

spatialRDD = ShapefileReader.readToGeometryRDD(sedona, S3_URL_SHAPEFILE)
countries_df = Adapter.toDf(spatialRDD, sedona)
countries_df.printSchema()
countries_df.createOrReplaceTempView("countries")

In [None]:
SedonaKepler.create_map(df=countries_df, name="Countries")


## Raster Data - GeoTiff

In [None]:
ortho_url = "s3://wherobots-examples/data/examples/NEON_ortho.tif"
ortho_df = sedona.sql(f"SELECT RS_FromPath('{ortho_url}') AS raster")
ortho_df.createOrReplaceTempView("ortho")
ortho_df.show(truncate=False)

In [None]:
htmlDf = sedona.sql("SELECT RS_AsImage(raster) FROM ortho")
SedonaUtils.display_image(htmlDf)

In [None]:
sedona.sql("SELECT RS_NumBands(raster) FROM ortho").show()


In [None]:
ndgi_df = sedona.sql("""
SELECT RS_MapAlgebra(raster, 'D', 'out = (rast[1] - rast[0]) / (rast[1] + rast[0]);')
AS ndgi 
FROM ortho
""")

## Writing Files

In [None]:
birdshed_df = sedona.sql("""
SELECT 
    COUNT(*) AS num, 
    any_value(countries.geometry) AS geometry, 
    any_value(countries.NAME_EN) AS name
FROM bb, countries
WHERE ST_Contains(countries.geometry, bb.location) 
GROUP BY countries.NAME_EN
ORDER BY num DESC
""")

birdshed_df.show()

In [None]:
import os
USER_S3_PATH = os.environ.get("USER_S3_PATH")

In [None]:
birdshed_df.repartition(1).write.mode("overwrite"). \
    format("geoparquet"). \
    save(USER_S3_PATH + "geoparquet/birdshed.parquet")