In [30]:
# import all necessary libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import sys
import git

In [32]:
### Define file paths
base_root = git.Repo('.', search_parent_directories=True).working_tree_dir
base_input = base_root + '\\Input & Output\\Input\\'
base_output = base_root + '\\Input & Output\\Output\\'

demand_input = base_input + 'Demand\\'
demand_output = base_output + 'Demand\\'
ontario_demand_path = demand_input + 'Zonal\\'

In [None]:
### Combine all demand data into one dataframe
demand_prefix = 'PUB_DemandZonal_'
start_year = 2003
zonal_demand_source_path = demand_input + 'Zonal\\'
header = 3

def combine_csvs(file_prefix, start_year, source_file_path, header):
    df_lst = []
    cur_year = 2025
    for year in range(start_year, cur_year + 1):
        df = pd.read_csv(source_file_path + file_prefix + str(year) + '.csv', header = header)
        df_lst.append(df)
        
    df_final = pd.concat(df_lst)
    return df_final

df_demand_final = combine_csvs(demand_prefix, start_year, zonal_demand_source_path, header)

In [41]:
# keep the useful columns and rename for more clarity
df_demand_final = df_demand_final[['Date', 'Hour', 'Ontario Demand', 'Toronto']]
df_demand_final = df_demand_final.rename(columns = {'Toronto': 'Toronto Demand'})

In [None]:
### Process the combined dataframe
# combining date with hour
df_demand_final['Date'] = pd.to_datetime(df_demand_final['Date'], format='ISO8601')
df_demand_final['Date'] = df_demand_final['Date'] + pd.to_timedelta(df_demand_final['Hour'], unit='h')
df_demand_final = df_demand_final.drop('Hour', axis = 1)

# daily average
df_demand_final_daily = df_demand_final.groupby(pd.Grouper(key='Date', freq='D'))[['Toronto Demand', 'Ontario Demand']].mean()
df_demand_final_daily.index = df_demand_final_daily.index.strftime('%Y-%m-%d')

# monthly average
df_demand_final_monthly = df_demand_final.groupby(pd.Grouper(key='Date', freq='ME'))[['Toronto Demand', 'Ontario Demand']].mean()
df_demand_final_monthly.index = df_demand_final_monthly.index.strftime('%Y-%m')

In [46]:
### Outputting dataframes
with pd.ExcelWriter(demand_output + 'toronto_demand_time_series_data.xlsx') as writer:
    df_demand_final.to_excel(writer, sheet_name='raw data', index = False)
    df_demand_final_daily.to_excel(writer, sheet_name='daily average')
    df_demand_final_monthly.to_excel(writer, sheet_name='monthly average')