In [1]:
import pandas as pd 
from sklearn.pipeline import Pipeline
import plotly.express as px
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

pd.set_option("display.max_columns", 100)

In [2]:
class SortDropCast(BaseEstimator, TransformerMixin):
    """
    This pipeline step will sort values by field "connectTime",
    drop columns "user_email", "slrpPaymentId", 
    and cast columns "cumEnergy_Wh", "peakPower_W" as float values. 
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.sort_values(by="connectTime").reset_index(drop=True)
        X["cumEnergy_Wh"] = X["cumEnergy_Wh"].astype(float)
        X["peakPower_W"] = X["peakPower_W"].astype(float)
        return X


class HelperFeatureCreation(BaseEstimator, TransformerMixin):
    """
    This pipeline step will drop any records that contain 0 for 
    "peakPower_W" or "cumEnergy_Wh". Four additional columns will be created:
    "reqChargeTime", "finishChargeTime", "Overstay", and "Overstay_h". 
    Any records with calculated charging durations greater than a day will be dropped. 
    Raw data (with these new features) at this staged will be saved.
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.loc[(X["peakPower_W"] != 0) & (X["cumEnergy_Wh"] != 0)].copy(deep=True)

        X["reqChargeTime_h"] = X["cumEnergy_Wh"] / X["peakPower_W"]

        X["connectTime"] = pd.to_datetime(X["connectTime"])
        X["startChargeTime"] = pd.to_datetime(X["startChargeTime"])
        X["Deadline"] = pd.to_datetime(X["Deadline"])
        X["lastUpdate"] = (pd.to_datetime(X["lastUpdate"]))

        X["finishChargeTime"] = (X["startChargeTime"] + pd.to_timedelta(X['reqChargeTime_h'], unit='hours').round("s"))
        
        X = X.loc[X["reqChargeTime_h"] < 24] # filter out bad rows (this occurs when there is a very low peak power and high energy delivered)

        X['temp_0'] = pd.Timedelta(days=0,seconds=0)
        X['Overstay'] = X["lastUpdate"] - X['Deadline']
        X["Overstay"] = X[["Overstay", "temp_0"]].max(axis=1)
        X['Overstay_h'] = X['Overstay'].dt.seconds / 3600

        X.drop(columns = ['temp_0'], inplace=True)

        X.to_csv("data/raw_data.csv")

        return X 

In [7]:
def create_ts(session):

    date_range = pd.date_range(start=session["startChargeTime"].round("5MIN"), end=session["finishChargeTime"].round("5MIN"), freq="5min").to_list()
    power_vals = np.ones(len(date_range)) * session["peakPower_W"]
    
    now = session["startChargeTime"].strftime('%D')
    temp_df = pd.DataFrame({"power":power_vals}, index=date_range)
    # temp_df = pd.DataFrame({"power":power_vals}, index=date_range).resample("5min").sum().reindex(
    #     index = pd.period_range(start=session["startChargeTime"].round("5MIN"), end=session["finishChargeTime"].round("5MIN"), freq='5min').to_timestamp(), fill_value=0
    #     )
    
    date_range = temp_df.index.to_list()
    power_vals = temp_df["power"].to_list()

In [2]:
df = pd.read_csv("data/todays_sessions.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,index,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,Overstay,Overstay_h,Time,Power (W)
0,0,2738,150000,6629.0,200,2023-03-20 07:28:53,bZ4X Limited,0 days 07:40:32,1259,1,,2023-03-20 07:29:37,300,0,REGULAR,25,12.01333,7.67555,4557,2023-03-20 15:10:09,,"[{'power_W': Decimal('6616'), 'timestamp': Dec...",15,2023-03-20T18:30:00,0,40040.0,150,6.040127,2023-03-20 13:32:01,0 days,0.0,2023-03-20 07:30:00,6629.0
1,0,2738,150000,6629.0,200,2023-03-20 07:28:53,bZ4X Limited,0 days 07:40:32,1259,1,,2023-03-20 07:29:37,300,0,REGULAR,25,12.01333,7.67555,4557,2023-03-20 15:10:09,,"[{'power_W': Decimal('6616'), 'timestamp': Dec...",15,2023-03-20T18:30:00,0,40040.0,150,6.040127,2023-03-20 13:32:01,0 days,0.0,2023-03-20 07:35:00,6629.0
2,0,2738,150000,6629.0,200,2023-03-20 07:28:53,bZ4X Limited,0 days 07:40:32,1259,1,,2023-03-20 07:29:37,300,0,REGULAR,25,12.01333,7.67555,4557,2023-03-20 15:10:09,,"[{'power_W': Decimal('6616'), 'timestamp': Dec...",15,2023-03-20T18:30:00,0,40040.0,150,6.040127,2023-03-20 13:32:01,0 days,0.0,2023-03-20 07:40:00,6629.0
3,0,2738,150000,6629.0,200,2023-03-20 07:28:53,bZ4X Limited,0 days 07:40:32,1259,1,,2023-03-20 07:29:37,300,0,REGULAR,25,12.01333,7.67555,4557,2023-03-20 15:10:09,,"[{'power_W': Decimal('6616'), 'timestamp': Dec...",15,2023-03-20T18:30:00,0,40040.0,150,6.040127,2023-03-20 13:32:01,0 days,0.0,2023-03-20 07:45:00,6629.0
4,0,2738,150000,6629.0,200,2023-03-20 07:28:53,bZ4X Limited,0 days 07:40:32,1259,1,,2023-03-20 07:29:37,300,0,REGULAR,25,12.01333,7.67555,4557,2023-03-20 15:10:09,,"[{'power_W': Decimal('6616'), 'timestamp': Dec...",15,2023-03-20T18:30:00,0,40040.0,150,6.040127,2023-03-20 13:32:01,0 days,0.0,2023-03-20 07:50:00,6629.0


In [4]:
raw = pd.read_csv("data/raw_data.csv")
raw = raw[raw["userId"] == 1259]
raw.head()

Unnamed: 0.1,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,Overstay,Overstay_h
2364,2771,150000,6624.0,127.0,2023-01-04 07:48:04,bZ4X Limited,0 days 08:46:50,1259,0,2023-01-04 17:30:00,2023-01-04 07:48:17,400.0,0.0,SCHEDULED,25,12.813,8.78055,3864,2023-01-04 16:35:07,63400.0,"[{'power_W': Decimal('6624'), 'timestamp': Dec...",15,2023-01-04T17:30:00,1,54224.0,184.0,8.18599,2023-01-04 15:59:27,0 days 00:00:00,0.0
2395,2829,150000,3400.0,127.0,2023-01-13 07:29:32,bZ4X Limited,0 days 06:30:17,1259,1,,2023-01-13 07:29:52,400.0,0.0,REGULAR,25,12.46868,6.50472,3929,2023-01-13 14:00:09,,"[{'power_W': Decimal('3400'), 'timestamp': Dec...",15,2023-01-13T17:30:00,0,40122.0,184.0,11.800588,2023-01-13 19:17:54,0 days 00:00:00,0.0
2404,2844,150000,6808.0,250.0,2023-01-16 12:01:39,bZ4X Limited,0 days 02:42:55,1259,1,,2023-01-16 12:02:13,400.0,0.0,REGULAR,25,4.21993,2.71527,3948,2023-01-16 14:45:08,,"[{'power_W': Decimal('6808'), 'timestamp': Dec...",16,2023-01-16T17:30:00,0,17128.0,137.0,2.515864,2023-01-16 14:33:10,0 days 00:00:00,0.0
2429,2890,150000,1874.0,127.0,2023-01-23 07:29:59,bZ4X Limited,0 days 05:04:44,1259,1,,2023-01-23 07:30:24,400.0,0.0,REGULAR,25,9.84515,5.07888,4002,2023-01-23 12:35:08,,"[{'power_W': Decimal('1874'), 'timestamp': Dec...",15,2023-01-23T17:30:00,0,30541.0,184.0,16.297225,2023-01-23 23:48:14,0 days 00:00:00,0.0
2454,2936,150000,2562.0,127.0,2023-01-27 07:41:11,bZ4X Limited,0 days 05:58:41,1259,1,,2023-01-27 07:41:27,400.0,0.0,REGULAR,25,11.49962,5.97805,4060,2023-01-27 13:40:08,,"[{'power_W': Decimal('2562'), 'timestamp': Dec...",15,2023-01-27T17:30:00,0,36368.0,184.0,14.19516,2023-01-27 21:53:10,0 days 00:00:00,0.0
