In [None]:
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import sessionmaker
import pandas as pd

# Create engine and connect
DATABASE_URL = 'postgresql://postgres:PASSWORD@...' #connection string
engine = create_engine(DATABASE_URL)
connection = engine.connect()

# Establish a session
Session = sessionmaker(bind=engine)
session = Session()

##### STEP 1: Adding Solar Output
Create a temp table that inner join solar production table with raw_lmp. The dataframe contains 6 columns:

    1. datetime (date-time data type)
    2. settlement_location_name (string type)
    3. LMP (float type)
    4. MLC (float type)
    5. MCC (float type)
    6. output (float type)

In [None]:
# Create a temporary table merging raw_lmp and 487solar
merge_table_query = f"""
    CREATE TEMP TABLE temp_merge_table AS
    SELECT 
        lmp.datetime,
        lmp.settlement_location_name,
        lmp."LMP",
        lmp."MCC",
        lmp."MLC",
        solar.output
    FROM 
        "raw_lmp" AS lmp
    INNER Join 
        "487solar" AS solar
    ON 
        solar.node_id=lmp.settlement_location_name 
        AND solar.month=lmp.month
        AND solar.day=lmp.day
        AND solar.time=lmp.time
    ;
"""

# Execute the query
merged_table = connection.execute(merge_table_query)

In [None]:
# inspect the merged table
query = "SELECT * FROM temp_merge_table LIMIT 5"
result = connection.execute(query)
for row in result:
    print(row)

##### STEP 2: Create Summary Statistics Table
The summary statistics are based on each settlement location for the past 1-year worth of data. i.e. if this month is April, then the summary statistics will reflect the data from last April to this March. The summar table contains 10 columns:

    1. Settlement Location Name (string type)
    2. Projected Annual Revenue $M (float with 2 d.p.)
    3. Total Solar Output (float with 2 d.p.)
    4. Solar Weighted LMP (float with 2 d.p.)
    5. Average LMP (float with 2 d.p.)
    6. Average MLC (float with 2 d.p.)
    7. Average MCC (float with 2 d.p.)
    8. Average Daytime LMP (float with 2 d.p.)
    9. Average Nighttime LMP (float with 2 d.p.)
    10. Day-Night Difference (float with 2 d.p.)


In [None]:
# Create a summary statistics table
summary_query1 = f"""
    DROP TABLE IF EXISTS summary_step1;
    CREATE TEMP TABLE summary_step1 AS 
    SELECT 
        *,
        "Average Daytime LMP" - "Average Nighttime LMP" AS "Day-Night Difference"
    FROM (
        SELECT
            settlement_location_name AS "Settlement Location Name",
            round((SUM("LMP" * output)/1000000)::numeric, 2) AS "Projected Annual Revenue $M",
            round(SUM(output)::numeric, 2) AS "Total Solar Output",
            round((SUM("LMP" * output) / SUM(output))::numeric, 2) AS "Solar Weighted LMP", 
            round(AVG("LMP")::numeric, 2) AS "Average LMP", 
            round(AVG("MCC")::numeric, 2) AS "Average MCC", 
            round(AVG("MLC")::numeric, 2) AS "Average MLC",
            round(AVG(CASE WHEN output > 0 THEN "LMP" END)::numeric, 2) AS "Average Daytime LMP",
            round(AVG(CASE WHEN output = 0 THEN "LMP" END)::numeric, 2) AS "Average Nighttime LMP"
        FROM temp_merge_table
        WHERE datetime >= date_trunc('month', CURRENT_DATE - interval '1 year') + interval '5 hours'
        GROUP BY settlement_location_name
    ) AS subquery
    ;
"""
summary1 = connection.execute(summary_query1)

In [None]:
# inspect the summary table
query = "SELECT * FROM summary_step1 LIMIT 5"
result = connection.execute(query)
for row in result:
    print(row)

##### STEP 3: Adding Coordinate Columns
Inner join coordinates table with summary_step1. The final dataframe contains 12 columns, adding the longitude and latitude columns to the summary table in step 2. Using pd.read_sql(), this will return a pandas dataframe. 

In [None]:
coord_query = f"""
    SELECT
        coor.latitude,
        coor.longitude,
        summ.*
    FROM 
        node_coords AS coor
    INNER JOIN
        summary_step1 AS summ
    ON summ."Settlement Location Name"=coor.settlement_location;
"""
summary_stats = pd.read_sql(coord_query, con=connection) 

##### STEP 4: Export As A csv file

In [None]:
import datetime

summary_stats.to_csv('SPP_summary_stats_April25.csv')

In [None]:
session.close()