##### STEP 1: Fetch the CSV files from the URL (available monthly after the 9th). 
This jupyter notebook is schedualed in ArcGIS online every month after the 9th to fetch the latest data. 
Note: 

In [1]:
import requests
import pandas as pd
from io import StringIO

#Define the function that output a dataframe object from a given url
def getDf_fromURL(url):
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the CSV content using pandas
        csv_data = StringIO(response.text)  # Convert response content to a file-like object
        df = pd.read_csv(csv_data)
        return df
    else:
        return print(f"Failed to fetch the data. Status code: {response.status_code}")

In [None]:
from datetime import datetime, timedelta
today = datetime.today()
last_month = today.replace(day=1) - timedelta(days=1)
year = last_month.strftime("%Y")
month = last_month.strftime("%m")
URL=f'https://portal.spp.org/file-browser-api/download/rtbm-lmp-by-location?path=/{year}/{month}/RTBM-LMP-MONTHLY-SL-{year}{month}.csv'
get_LMPdata = getDf_fromURL(URL)

##### STEP 2: Dataframe Manipulation with Python
The dataframe is transfromed into the intended form, with 5 columns: </p>
   
    1. Datetime (date-time data type)
    2. Settlement Location Name (string type)
    3. LMP (float type)
    4. MLC (float type)
    5. MCC (float type)

In [7]:
#Define the function that restructure the dataframe 
def conversion(raw_data):
    # formate dataframe 
    raw_data.columns = raw_data.columns.str.strip()

    melted = raw_data.melt(
        id_vars=['Date', 'Settlement Location Name', 'Price Type'],
        value_vars=[f'HE{i:02d}' for i in range(1, 25)],
        var_name='hour',
        value_name='price'
    )

    # Convert hour (e.g., HE01) to a time
    melted['hour'] = melted['hour'].str.extract('HE(\d+)').astype(int) - 1
    melted['datetime'] = pd.to_datetime(melted['Date']) + pd.to_timedelta(melted['hour'], unit='h')

    # Pivot `price type` to create `lmp`, `mcc`, `mlc` columns
    result = melted.pivot_table(
        index=['datetime', 'Settlement Location Name'],
        columns='Price Type',
        values='price',
        aggfunc='first'
    ).reset_index()
    
    return result  

In [19]:
#restructure dataframe to correct format
fin_LMP = conversion(get_LMPdata)

In [20]:
import datetime
fin_LMP['month']=fin_LMP['datetime'].dt.month
fin_LMP['day']=fin_LMP['datetime'].dt.day
fin_LMP['time']=fin_LMP['datetime'].dt.hour

In [21]:
fin_LMP = fin_LMP.rename({'Settlement Location Name': 'settlement_location_name'}, axis=1)
fin_LMP

Price Type,datetime,settlement_location_name,LMP,MCC,MLC,month,day,time
0,2025-02-01 06:00:00,AEC,20.5074,3.8642,0.4478,2,1,6
1,2025-02-01 06:00:00,AECC_CSWS,22.8771,5.9554,0.7263,2,1,6
2,2025-02-01 06:00:00,AECC_ELKINS,21.9785,5.0933,0.6897,2,1,6
3,2025-02-01 06:00:00,AECC_FITZHUGH,21.8208,5.0150,0.6105,2,1,6
4,2025-02-01 06:00:00,AECC_FLTCREEK,21.6021,5.0483,0.3584,2,1,6
...,...,...,...,...,...,...,...,...
828571,2025-03-01 05:00:00,WR_MOPEP_MPS,15.4571,-2.1748,-0.5350,3,1,5
828572,2025-03-01 05:00:00,WR_MW_KPP_NITS,15.9405,-1.5029,-0.7236,3,1,5
828573,2025-03-01 05:00:00,WR_SEG,11.4324,-6.6340,-0.1006,3,1,5
828574,2025-03-01 05:00:00,WR_WR,15.6272,-1.6111,-0.9287,3,1,5


##### STEP 3: Upload Data to Database

In [26]:
#establish connection with AWS cloud service
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import insert

connection_string = 'postgresql://postgres:PASSWORD@...' #connection string
engine = create_engine(connection_string)

# Establish a session
Session = sessionmaker(bind=engine)
session = Session()

In [23]:
#store the sample LMP in database
fin_LMP.to_sql('raw_lmp', con=engine, if_exists='append', index=False)

print('New rows added to the raw_lmp table')

New rows added to the raw_lmp table


In [27]:
#commit to change and close session
session.commit()
session.close()