In [None]:
from sedona.spark import *
config = SedonaContext.builder().getOrCreate()
sedona = SedonaContext.create(config)

In [None]:
database = 'gde_gold'
sedona.sql(f'CREATE DATABASE IF NOT EXISTS org_catalog.{database}')

In [None]:
sedona.sql(f'''
create or replace table org_catalog.{database}.analytics_gold as
select
a.sale_id,
a.overture_id,
a.polygon as geometry,
a.height,
a.sale_date,
a.sale_price,
a.city,
a.year_built,
a.beds,
a.bath_3qtr + a.bath_full + a.bath_half as total_bath,
a.sqft,
a.sqft_fbsmt,
a.stories,
b.flood_zone,
c.distance as dist_to_major_intersection,
d.distance as dist_to_park
from org_catalog.gde_bronze.king_co_homes_conflated a
left outer join org_catalog.gde_silver.homes_flood_hazards b using (sale_id)
left outer join org_catalog.gde_silver.roads_proximity c using (sale_id)
left outer join org_catalog.gde_silver.homes_distance_to_seattle d using (sale_id)
''')

In [None]:
sedona.sql(f'''
create or replace table org_catalog.{database}.analytics_home_level as
select
geometry,
overture_id,
count(sale_id) as sales_count,
array_agg(sale_price) as price,
array_agg(sale_date) as sale_date
from org_catalog.{database}.analytics_gold
group by 1, 2
order by sale_date asc
''')

In [None]:
sedona.sql(f'select * from org_catalog.{database}.analytics_home_level').show()

In [None]:
sedona.sql(f'''
create or replace table org_catalog.{database}.ai_ready as
with a as (select * from  org_catalog.gde_bronze.king_co_homes order by sale_date asc)
select
a.city,
EXTRACT(YEAR FROM a.sale_date) AS year,
EXTRACT(MONTH FROM a.sale_date) AS month,
count(a.sale_id) as total_sales,
min(a.sale_price) as min_sale_price,
max(a.sale_price) as max_sale_price,
avg(a.sale_price) as mean_sale_price,
array_agg(a.sale_price) as sale_prices,
array_agg(d.distance) as dist_to_park
from a
left outer join org_catalog.gde_silver.homes_distance_to_seattle d using (sale_id)
group by 1, 2, 3
''')

In [None]:
sedona.sql(f'select * from org_catalog.{database}.ai_ready').show()