In [1]:
import pandas as pd 
from sklearn.pipeline import Pipeline
from datacleaning.FetchData import FetchData
from datacleaning.CleanData import CleanData
from datetime import datetime
import plotly.express as px
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

pd.set_option("display.max_columns", 100)

In [2]:
class SortDropCast(BaseEstimator, TransformerMixin):
    """
    This pipeline step will sort values by field "connectTime",
    drop columns "user_email", "slrpPaymentId", 
    and cast columns "cumEnergy_Wh", "peakPower_W" as float values. 
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.sort_values(by="connectTime").reset_index(drop=True)
        X["cumEnergy_Wh"] = X["cumEnergy_Wh"].astype(float)
        X["peakPower_W"] = X["peakPower_W"].astype(float)
        return X


class HelperFeatureCreation(BaseEstimator, TransformerMixin):
    """
    This pipeline step will drop any records that contain 0 for 
    "peakPower_W" or "cumEnergy_Wh". Four additional columns will be created:
    "reqChargeTime", "finishChargeTime", "Overstay", and "Overstay_h". 
    Any records with calculated charging durations greater than a day will be dropped. 
    Raw data (with these new features) at this staged will be saved.
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.loc[(X["peakPower_W"] != 0) & (X["cumEnergy_Wh"] != 0)].copy(deep=True)

        X["reqChargeTime_h"] = X["cumEnergy_Wh"] / X["peakPower_W"]

        X["connectTime"] = pd.to_datetime(X["connectTime"])
        X["startChargeTime"] = pd.to_datetime(X["startChargeTime"])
        X["Deadline"] = pd.to_datetime(X["Deadline"])
        X["lastUpdate"] = (pd.to_datetime(X["lastUpdate"]))

        X["finishChargeTime"] = (X["startChargeTime"] + pd.to_timedelta(X['reqChargeTime_h'], unit='hours').round("s"))
        
        X = X.loc[X["reqChargeTime_h"] < 24] # filter out bad rows (this occurs when there is a very low peak power and high energy delivered)

        X['temp_0'] = pd.Timedelta(days=0,seconds=0)
        X['Overstay'] = X["lastUpdate"] - X['Deadline']
        X["Overstay"] = X[["Overstay", "temp_0"]].max(axis=1)
        X['Overstay_h'] = X['Overstay'].dt.seconds / 3600

        X.drop(columns = ['temp_0'], inplace=True)

        X.to_csv("data/raw_data.csv")

        return X 

In [3]:
def create_ts(session):

    date_range = pd.date_range(start=session["startChargeTime"], end=session["finishChargeTime"], freq="5min").to_list()
    power_vals = np.ones(len(date_range)) * session["peakPower_W"]
    
    now = session["startChargeTime"].strftime('%D')
    temp_df = pd.DataFrame({"power":power_vals}, index=date_range).resample("5min").sum().reindex(
        index = pd.period_range(start=session["startChargeTime"].round("5MIN"), end=session["finishChargeTime"].round("5MIN"), freq='5min').to_timestamp(), fill_value=0
        )
    
    date_range = temp_df.index.to_list()
    power_vals = temp_df["power"].to_list()

In [4]:
df = pd.read_csv("data/raw_data.csv", parse_dates=True)
df = df[df["dcosId"] == 4405].copy()
df


Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,Overstay,Overstay_h
0,0,0,0,3229,4250,3280.0,200.0,2023-03-04 23:03:39,Volt,0 days 01:46:19,1154,1,,2023-03-04 23:03:48,300.0,0.0,REGULAR,25,3.15791,1.77194,4405,2023-03-05 00:50:07,,"[{'power_W': Decimal('3280'), 'timestamp': Dec...",11,2023-03-05T09:30:00,0,5542.0,150.0,1.689634,2023-03-05 00:45:11,0 days,0.0


In [5]:
pipe = Pipeline([
    ("1", SortDropCast()),
    ("2", HelperFeatureCreation())
])

df = pipe.fit_transform(df)
df = df.iloc[0]

In [6]:
create_ts(df)

In [12]:
df = pd.read_csv("data/todays_sessions.csv")
df2 = df[["dcosId", "cumEnergy_Wh", "vehicle_model"]].groupby("dcosId").first().copy()
df2["percentage_energy"] = df2["cumEnergy_Wh"] / df2["cumEnergy_Wh"].sum(axis=0)
df2

Unnamed: 0_level_0,cumEnergy_Wh,vehicle_model,percentage_energy
dcosId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4406,2869.0,e-Golf,0.058958
4407,35518.0,Model 3,0.729892
4410,10275.0,Model 3 Performance,0.21115


In [11]:
df["cumEnergy_Wh"].sum(axis=0)

14014656.0

In [13]:
df

Unnamed: 0.1,Unnamed: 0,index,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,Overstay,Overstay_h,Time,Power (W)
0,0,0,40000,6327.0,200,2023-03-05 13:15:10,e-Golf,0 days 00:29:07,1060,1,,2023-03-05 13:16:01,300,0,REGULAR,25,1.22791,0.48527,4406,2023-03-05 13:45:08,,"[{'power_W': Decimal('6327'), 'timestamp': Dec...",13,2023-03-05T21:00:00,0,2869.0,150,0.453453,2023-03-05 13:43:13,0 days,0.0,2023-03-05 00:00:00,0.0
1,0,0,40000,6327.0,200,2023-03-05 13:15:10,e-Golf,0 days 00:29:07,1060,1,,2023-03-05 13:16:01,300,0,REGULAR,25,1.22791,0.48527,4406,2023-03-05 13:45:08,,"[{'power_W': Decimal('6327'), 'timestamp': Dec...",13,2023-03-05T21:00:00,0,2869.0,150,0.453453,2023-03-05 13:43:13,0 days,0.0,2023-03-05 00:05:00,0.0
2,0,0,40000,6327.0,200,2023-03-05 13:15:10,e-Golf,0 days 00:29:07,1060,1,,2023-03-05 13:16:01,300,0,REGULAR,25,1.22791,0.48527,4406,2023-03-05 13:45:08,,"[{'power_W': Decimal('6327'), 'timestamp': Dec...",13,2023-03-05T21:00:00,0,2869.0,150,0.453453,2023-03-05 13:43:13,0 days,0.0,2023-03-05 00:10:00,0.0
3,0,0,40000,6327.0,200,2023-03-05 13:15:10,e-Golf,0 days 00:29:07,1060,1,,2023-03-05 13:16:01,300,0,REGULAR,25,1.22791,0.48527,4406,2023-03-05 13:45:08,,"[{'power_W': Decimal('6327'), 'timestamp': Dec...",13,2023-03-05T21:00:00,0,2869.0,150,0.453453,2023-03-05 13:43:13,0 days,0.0,2023-03-05 00:15:00,0.0
4,0,0,40000,6327.0,200,2023-03-05 13:15:10,e-Golf,0 days 00:29:07,1060,1,,2023-03-05 13:16:01,300,0,REGULAR,25,1.22791,0.48527,4406,2023-03-05 13:45:08,,"[{'power_W': Decimal('6327'), 'timestamp': Dec...",13,2023-03-05T21:00:00,0,2869.0,150,0.453453,2023-03-05 13:43:13,0 days,0.0,2023-03-05 00:20:00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
859,2,2,250000,6828.0,200,2023-03-05 19:13:41,Model 3 Performance,0 days 01:35:25,1276,1,,2023-03-05 19:14:42,300,0,REGULAR,25,2.88541,1.59027,4410,2023-03-05 20:50:07,,"[{'power_W': Decimal('17'), 'timestamp': Decim...",15,2023-03-05T20:30:00,0,10275.0,150,1.504833,2023-03-05 20:44:59,0 days,0.0,2023-03-05 23:35:00,0.0
860,2,2,250000,6828.0,200,2023-03-05 19:13:41,Model 3 Performance,0 days 01:35:25,1276,1,,2023-03-05 19:14:42,300,0,REGULAR,25,2.88541,1.59027,4410,2023-03-05 20:50:07,,"[{'power_W': Decimal('17'), 'timestamp': Decim...",15,2023-03-05T20:30:00,0,10275.0,150,1.504833,2023-03-05 20:44:59,0 days,0.0,2023-03-05 23:40:00,0.0
861,2,2,250000,6828.0,200,2023-03-05 19:13:41,Model 3 Performance,0 days 01:35:25,1276,1,,2023-03-05 19:14:42,300,0,REGULAR,25,2.88541,1.59027,4410,2023-03-05 20:50:07,,"[{'power_W': Decimal('17'), 'timestamp': Decim...",15,2023-03-05T20:30:00,0,10275.0,150,1.504833,2023-03-05 20:44:59,0 days,0.0,2023-03-05 23:45:00,0.0
862,2,2,250000,6828.0,200,2023-03-05 19:13:41,Model 3 Performance,0 days 01:35:25,1276,1,,2023-03-05 19:14:42,300,0,REGULAR,25,2.88541,1.59027,4410,2023-03-05 20:50:07,,"[{'power_W': Decimal('17'), 'timestamp': Decim...",15,2023-03-05T20:30:00,0,10275.0,150,1.504833,2023-03-05 20:44:59,0 days,0.0,2023-03-05 23:50:00,0.0
