In [1]:
# import all necessary libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import sys
import git

In [2]:
### Define file paths
base_root = git.Repo('.', search_parent_directories=True).working_tree_dir
base_input = base_root + '\\Input & Output\\Input\\'
base_output = base_root + '\\Input & Output\\Output\\'

market_price_input = base_input + 'Market Price\\'
market_price_output = base_output + 'Market Price\\'

In [3]:
### Combine all supply data into one dataframe
price_prefix = 'PUB_PriceNodal_'
start_year = 2002
nodal_price_source_path = market_price_input + 'Nodal Price\\'
header = 4

def combine_csvs(file_prefix, start_year, source_file_path, header):
    df_lst = []
    cur_year = 2025
    for year in range(start_year, cur_year + 1):
        df = pd.read_csv(source_file_path + file_prefix + str(year) + '.csv', header = header)
        df_lst.append(df)
        
    df_final = pd.concat(df_lst)
    return df_final

df_price_final = combine_csvs(price_prefix, start_year, nodal_price_source_path, header)

In [4]:
# keep the useful columns and rename for more clarity
df_price_final = df_price_final[['Date', 'Hour', 'Darlington']]
df_price_final = df_price_final.rename(columns = {'Darlington': 'Price'})

In [5]:
### Process the combined dataframe
# combining date with hour
df_price_final['Date'] = pd.to_datetime(df_price_final['Date'])
df_price_final['Date'] = df_price_final['Date'] + pd.to_timedelta(df_price_final['Hour'], unit='h')
df_price_final = df_price_final.drop('Hour', axis = 1)

# ensure there are no null values
df_price_final['Price'] = df_price_final['Price'].fillna(0)

# daily average
df_price_daily = df_price_final.groupby(pd.Grouper(key='Date', freq='D'))['Price'].mean()
df_price_daily.index = df_price_daily.index.strftime('%Y-%m-%d')

# monthly average
df_price_monthly = df_price_final.groupby(pd.Grouper(key='Date', freq='ME'))['Price'].mean()
df_price_monthly.index = df_price_monthly.index.strftime('%Y-%m')

In [6]:
### Outputting dataframes
with pd.ExcelWriter(market_price_output + 'toronto_price_time_series_data.xlsx') as writer:
    df_price_final.to_excel(writer, sheet_name='raw data', index = False)
    df_price_daily.to_excel(writer, sheet_name='daily average')
    df_price_monthly.to_excel(writer, sheet_name='monthly average')