In [1]:
import pandas as pd

In [2]:
# Load the Palo Alto EV Dataset
df = pd.read_csv('../../data/raw_data/Palo_Alto_EV_Data.csv')

  df = pd.read_csv('../../data/raw_data/Palo_Alto_EV_Data.csv')


In [3]:
# Fix Station_Name typo and remove duplicate rows
df['Station Name'] = df.iloc[:,0].replace('PALO ALTO CA / BRYANT # 1', 'PALO ALTO CA / BRYANT #1')
clean_table = df[['Station Name', 'Start Date', 'Charging Time (hh:mm:ss)', 'Energy (kWh)']].drop_duplicates()

In [4]:
# Define column names for the station clusters data
station_clusters_columns = ['Station Cluster Name', 'Station_Name']

# Load the Palo Alto Station Clusters Data
station_clusters_data = pd.read_csv('../../data/raw_data/Palo_Alto_Station_Clusters_Data.csv', header=None, names=station_clusters_columns)

# Merge the cleaned EV data with the station clusters data
joined_table = pd.merge(clean_table, station_clusters_data, left_on='Station Name', right_on='Station_Name')

# Remove duplicate station name
joined_table = joined_table.drop(columns=['Station_Name'])

In [5]:
# Adjust the date format 
joined_table['Start Date'] = pd.to_datetime(joined_table['Start Date'], errors='coerce')
joined_table['Start Date'] = joined_table['Start Date'].dt.strftime('%d/%m/%Y %H:%M:%S')

In [6]:
# Load the Processed Palo Alto EV Dataset
df = joined_table

In [7]:
# Convert the 'Start Date' column to datetime format
df['Start Date'] = pd.to_datetime(df['Start Date'], dayfirst=True)

# Set common start and end date range
global start_point
start_point = pd.Timestamp('2011-07-29 20:00:00') # Based on queries_results/station_cluster_time_range
global file_end_point
file_end_point = pd.Timestamp('2020-12-31 22:00:00') # Based on queries_results/station_cluster_time_range

In [8]:
# Function to create a list of time intervals the charging span
def intervals(split_time, start_point):
    # Calculate the end point
    end_point = start_point + pd.Timedelta(hours=int(split_time[0]), 
                                            minutes=int(split_time[1]), 
                                            seconds=int(split_time[2]))

    # Adjust end point if it falls exactly on the hour
    if end_point == end_point.floor('60min'):
        end_point = end_point - pd.Timedelta(hours=1)

    # Create a list of time intervals
    time_intervals = pd.period_range(start=start_point.floor('60min'), 
                                     end=end_point.floor('60min'), 
                                     freq='60T')
    time_intervals = time_intervals.to_timestamp()

    return time_intervals

In [9]:
# Function to create a list of minutes the charging span in each time interval
def minute_intervals(split_time, start_point):
    end_point = start_point + pd.Timedelta(hours=int(split_time[0]), minutes=int(split_time[1]), seconds=int(split_time[2]))
    minute_intervals = []
    while start_point < end_point:
        ceiling = start_point.ceil('60min')
        # pd.Timestamp.ceil() and pd.Timestamp.floor() do not work on round hours as intended
        if start_point == ceiling:
            # Case when there is at least an hour left of charging
            if start_point + pd.Timedelta(hours=1) < end_point:
                minute_intervals.append(60)
                start_point = start_point + pd.Timedelta(minutes=60)
            # Case when there is less than an hour left of charging
            else:
                minutes_value = (end_point - start_point) / pd.Timedelta(minutes=1)
                minute_intervals.append(minutes_value)
                start_point = start_point + pd.Timedelta(minutes=minutes_value)

        else:            
            # Case when there is at least an hour left of charging
            if ceiling < end_point:
                minutes_value = (ceiling - start_point) / pd.Timedelta(minutes=1)
                minute_intervals.append(minutes_value)
                start_point = start_point + pd.Timedelta(minutes=minutes_value)
            # Case when there is less than an hour left of charging
            else:
                minutes_value = (end_point - start_point) / pd.Timedelta(minutes=1)
                minute_intervals.append(minutes_value)
                start_point = start_point + pd.Timedelta(minutes=minutes_value)

    return minute_intervals

In [10]:
# Function that iterates over input data rows, sums up respective split energy for periods and outputs a file
def dictionary_add_station(cluster_name):
    time_intervals = pd.period_range(start=start_point, end=file_end_point, freq='60T')
    time_intervals = time_intervals.to_timestamp()
    # Filter table for appropriate station cluster
    station_table = df[df['Station Cluster Name']==cluster_name]
    station_table.reset_index(drop=True, inplace=True)

    # Dictionary for each time period and energy value
    hour_energy_dict = {}
    for i in time_intervals:
        hour_energy_dict[i] = 0

    # Dictionary for each time period and charging duration value
    hour_duration_dict = {}
    for i in time_intervals:
        hour_duration_dict[i] = 0

    i = 0
    # For every charging instace
    while i < len(station_table):
        # Extract hour, minute and second duration of charging
        split_time = station_table['Charging Time (hh:mm:ss)'][i].split(':')

        # Identify the hour intervals that the charging spans
        row_intervals = intervals(split_time, station_table['Start Date'][i])

        # Calculate how long charging was done in each interval
        minute_split = minute_intervals(split_time, station_table['Start Date'][i])

        # Extract energy in charging instance
        energy = station_table['Energy (kWh)'][i]

        j = 0

        # Update the energy values in dictionary for each hour interval
        while j < len(row_intervals):
            hour_energy_dict[row_intervals[j]] = hour_energy_dict[row_intervals[j]] + (minute_split[j] * energy / sum(minute_split))
            j = j + 1
        i = i + 1

    # Create output file
    hour_list = []
    energy_list = []
    for k in hour_energy_dict.keys():
        hour_list.append(k)
        energy_list.append(hour_energy_dict[k])
    data = {
        "Hour": hour_list,
        "Energy": energy_list
    }
    new_df = pd.DataFrame(data=data)
    new_file_name = '../../data/clean_data/' + cluster_name + '.csv'

    # Columns are respectively: datetime, total energy demand in hour
    new_df.to_csv(new_file_name, index=False)

In [11]:
#  Process every station's data
def main():
    cluster_names = ['WEBSTER', 'CAMBRIDGE', 'MPL', 'RINCONADA LIB', 'TED THOMPSON', 'HAMILTON', 'HIGH', 'BRYANT']
    for k in cluster_names:
        dictionary_add_station(k)

main()