# Import Apache Sedona

In [None]:
from sedona.spark import *

# Define Sedona context if not defined yet

In [None]:
config = SedonaContext.builder() .\
    getOrCreate()
sedona = SedonaContext.create(config)

sc = sedona.sparkContext

# Set Connection and Context options.
The authentication parameters (username and password) might be different based on the authentication setup of the AWS RDS database.
**NOTE**: Please ensure that the AWS RDS database instance created is accessible from the server executing the connection. Check [here](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_VPC.Scenarios.html) to learn more about inter and intra VPC connections to the RDS instance.

In [None]:
url = '<URL>' #jdbc:postgresql://ENDPOINT/DATABASE_NAME
driver = 'org.postgresql.Driver'
user = '<USERNAME>'
password = '<PASSWORD>'


In [None]:
options = {"url": url, "driver": driver, "user": user, "password": password}

# Read entire table from RDS PostgreSQL into a Sedona Dataframe
A `dbtable` option with the table_name as value can be passed to the sedona context object to read the entire table contents into the dataframe. Essentially, run a 'SELECT * FROM table' query.

In [None]:
src_table_name = 'city_tbl_geom'
df = sedona.read.format("jdbc")\
                .options(**options)\
                .option('dbtable', src_table_name)\
                .load()

# Read a particular query result from RDS PostgresQL into a Sedona Dataframe
A `query` option with the desired query as value can be passed to the sedona context object to read the result of the query into the dataframe. 

In [None]:
query = "SELECT GEOM, CITY_NAME FROM " + src_table_name + " WHERE CITY_NAME = 'Seattle'" #custom query to run
df_query = sedona.read.format("jdbc")\
            .options(**options)\
            .option("query", query)\
            .load()

# Create Sedona Geometry type column
PostGIS serializes geometries as WKB and hence the dataframe will be populated with WKB strings in the geometry column.
The WKB strings can be converted to Sedona Geometry types using `ST_GeomFromWKB` exposed via Sedona.

In [None]:
df_city = df.selectExpr("ST_GeomFromWKB(geom) as geom", "CITY_NAME")
df_seattle = df_query.selectExpr("ST_GeomFromWKB(geom) as geom", "CITY_NAME")

#create a table for use in subsequent queries
df_city.createOrReplaceTempView("city_table")
df_seattle.createOrReplaceTempView("seattle_table")

# Operate on geospatial data using Sedona
Now, any desired processing can be performed on the loaded geospatial data with Sedona's extensive [vector function catalog](https://docs.wherobots.services/1.1.0/references/wherobots-compute/vector-data/Overview/)

In [None]:
new_york_point = 'POINT (-74.00 40.71)'
df_ny = sedona.sql("SELECT ST_GeomFromWKT('" + new_york_point + "') as new_york")
df_ny.createOrReplaceTempView("new_york_table")
df_dist_from_ny = sedona.sql("SELECT ST_AsEWKB(geom) as geom, ST_DistanceSphere(geom, new_york) as dist_from_ny, CITY_NAME from city_table, new_york_table")

# Write computed data back to RDS PostGIS

A Sedona Dataframe can be written back to the connected RDS PostGIS instance using the write() method of the sedona context object. 
Same as with the read() method, a map `options` must be passed with the necessary connection and context options.

Provide a `dbtable` option to specify the name of the destination table.

Also, provide a [save mode](https://spark.apache.org/docs/1.6.0/api/java/org/apache/spark/sql/SaveMode.html) parameter to specify handling collisions with existing tables if any.

The written geom data would be in binary, the table in postgres can be altered, as described [here](https://docs.wherobots.services/1.1.0/tutorials/wherobots-compute/save/#step-3-optional-in-postgis)

In [None]:
destination_table = "distance_from_ny" #destination table name in PostGIS
save_mode = "append" #Append data to the table if the table already exists with some data. Other possible values are: errorifexists, ignore, overwrite.
df_dist_from_ny.write\
                .format("jdbc")\
                .options(**options)\
                .option("dbtable", destination_table)\
                .mode(saveMode=save_mode)\
                .save()