In [43]:
import pandas as pd 
from sklearn.pipeline import Pipeline
from datacleaning.FetchData import FetchData
from datacleaning.CleanData import CleanData
from datetime import datetime
import plotly.express as px

pd.set_option("display.max_columns", 100)

In [88]:
FetchData.scan_save_all_records()
CleanData.clean_save_raw_data()

In [89]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd 
import numpy as np 


class SortDropCast(BaseEstimator, TransformerMixin):
    """
    This pipeline step will sort values by field "connectTime",
    drop columns "user_email", "slrpPaymentId", 
    and cast columns "cumEnergy_Wh", "peakPower_W" as float values. 
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.sort_values(by="connectTime").drop(columns=["user_email", "slrpPaymentId"]).reset_index(drop=True)
        X["cumEnergy_Wh"] = X["cumEnergy_Wh"].astype(float)
        X["peakPower_W"] = X["peakPower_W"].astype(float)
        return X


class HelperFeatureCreation(BaseEstimator, TransformerMixin):
    """
    This pipeline step will drop any records that contain 0 for 
    "peakPower_W" or "cumEnergy_Wh". Two additional columns will be created:
    "reqChargeTime" and "finishChargeTime".
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.loc[(X["peakPower_W"] != 0) & (X["cumEnergy_Wh"] != 0)]
        X = X.assign(reqChargeTime_h=(X["cumEnergy_Wh"] / X["peakPower_W"]))
        X = X.assign(connectTime=(pd.to_datetime(X["connectTime"])))
        X = X.assign(
            finishChargeTime=(X["connectTime"] + pd.to_timedelta(X['reqChargeTime_h'], unit='hours').round("s"))
        )
        return X 


class CreateNestedSessionTimeSeries(BaseEstimator, TransformerMixin):
    """
    This pipeline step will create a time series for each session. Two new columns will be created, 
    "time_vals" and "power_vals", respective lists for a time and power demand. "time_vals" are rounded to the 
    closest 5 min. 
    """ 
    def __init__(self) -> None:
        super().__init__()
    
    def fit(self, X, y=None):
        return self 

    def transform(self, X) -> pd.DataFrame:
        self.ts_df = pd.DataFrame(columns=["time_vals", "power_vals"])
        X.apply(self.__create_ts, axis=1)
        X = pd.concat([X.reset_index(), self.ts_df], axis=1)
        return X

    def __create_ts(self, session):

        date_range = pd.date_range(start=session["connectTime"], end=session["finishChargeTime"], freq="5min").to_list()
        power_vals = np.ones(len(date_range)) * session["peakPower_W"]
        
        temp_df = pd.DataFrame([[date_range, power_vals]], columns=self.ts_df.columns)
        self.ts_df = pd.concat([self.ts_df, temp_df], ignore_index=True)


class FeatureCreation(BaseEstimator, TransformerMixin):
    """
    This pipeline step will create an "energy_demand_kWh" and "peak_power_W" column. 
    The name of the dataframe's index will be set to "time", and "day" and "month" columns 
    will be created. 
    """
    def fit(self, X, y=None):
        return self 

    @ staticmethod
    def transform(X) -> pd.DataFrame:
        X["energy_demand_kWh"] = (X["avg_power_demand_W"]/1000)/12
        # for the highest granularity, peak power is equal to the power demand
        # (different for different granularities though)
        X["peak_power_W"] = X["avg_power_demand_W"] 
        X.index.name = "time"
        X["day"] = X.index.day_name()
        X["month"] = X.index.month_name()
        return X


In [60]:
pipeline = Pipeline([
    ("1", SortDropCast()),
    ("2", HelperFeatureCreation()),
    ("3", CreateNestedSessionTimeSeries())
])

In [90]:
raw = pd.read_csv("data/raw_data.csv")
raw = raw.sort_values("connectTime")
raw["connectTime"] = pd.to_datetime(raw["connectTime"])
raw

Unnamed: 0.1,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,user_email,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,slrpPaymentId,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr
2582,2582,6600,6335,9.0,2020-11-05 10:30:16,yossarianassyrian@gmail.com,500e,0 days 03:43:57,605,1,,2020-11-05T10:31:09,200.0,15.0,REGULAR,23,5.35224,6b108a9e64224989135baca2cffa2c596359c45ef2c0f0...,3.73249,24,2020-11-05T14:15:06,,"[{'power_W': Decimal('6259'), 'timestamp': Dec...",7,1969-12-31T16:00:00,0,3281,130.0
1347,1347,24000,7005,3.0,2020-11-11 07:39:55,sohum@ucsd.edu,Model 3,0 days 06:50:07,486,1,,2020-11-11T07:39:59,200.0,15.0,REGULAR,23,10.75291,7e40611ff8f81771592628312547eedede36157d5aee63...,6.83527,26,2020-11-11T14:30:06,,"[{'power_W': Decimal('0'), 'timestamp': Decima...",3,2020-11-12T03:11:00,0,33458,150.0
2611,2611,3600,3450,3.0,2020-11-13 16:19:55,kylebhaas@berkeley.edu,Volt,0 days 20:40:02,620,0,2020-11-14T04:15:00,2020-11-13T16:20:06,300.0,12.0,SCHEDULED,25,29.32211,5475d7bfb9fdca49e239f14c88bc0ac50a8a26976a3e44...,20.66722,30,2020-11-14T13:00:08,18400.0,"[{'power_W': Decimal('0'), 'timestamp': Decima...",12,2020-11-14T04:11:00,1,15216,180.0
660,660,7200,6889,3.0,2020-11-14 23:47:06,rayconstantino.me@gmail.com,Bolt,0 days 02:12:51,618,1,,2020-11-14T23:47:16,400.0,18.0,REGULAR,23,3.82125,add3a0ba4ca2d77670c3d08bbfb1c0903774c48d08d305...,2.21416,31,2020-11-15T02:00:07,,"[{'power_W': Decimal('6889'), 'timestamp': Dec...",6,1969-12-31T16:00:00,0,14378,150.0
346,346,6000,6852,,2020-11-16 11:38:44,khuffman@health.ucsd.edu,B-Class Electric Drive,0 days 03:12:45,623,1,,2020-11-16T11:42:22,,,REGULAR,23,,,3.21249,32,2020-11-16T14:55:07,,"[{'power_W': Decimal('6813'), 'timestamp': Dec...",9,2020-11-17T04:11:00,0,12484,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2719,2719,100000,6634,250.0,2023-01-25 12:51:54,courtnee.butler@berkeley.edu,Kona,0 days 04:13:04,1149,1,,2023-01-25T12:52:04,400.0,0.0,REGULAR,25,6.27835,c4d7f4630e8c6ea62c425b7afd8bd0265da2dc1767a6b1...,4.21777,4042,2023-01-25T17:05:08,,"[{'power_W': Decimal('6622'), 'timestamp': Dec...",11,2023-01-25T18:00:00,0,26383,137.0
965,965,20000,6339,250.0,2023-01-25 14:54:31,guillermo.cornejo@berkeley.edu,Model 3,0 days 07:54:46,909,1,,2023-01-25T14:55:23,400.0,0.0,REGULAR,25,11.34050,a1b2a74533d24cde7005a939ed9368b2a6741d05c73442...,7.91277,4043,2023-01-25T22:50:09,,"[{'power_W': Decimal('6276'), 'timestamp': Dec...",13,2023-01-25T21:00:00,0,46482,137.0
1502,1502,17200,6619,250.0,2023-01-25 15:24:35,d_zhang@berkeley.edu,Model X,0 days 04:39:57,1180,1,,2023-01-25T15:25:11,400.0,0.0,REGULAR,25,6.89219,1e15e69418fc88301bd47439d08450a00420232503b7d8...,4.66583,4044,2023-01-25T20:05:08,,"[{'power_W': Decimal('6510'), 'timestamp': Dec...",15,2023-01-25T19:15:00,0,29131,137.0
712,712,40000,6622,127.0,2023-01-25 17:01:23,haitam.laarabi@lbl.gov,e-Golf,0 days 01:38:30,1060,1,,2023-01-25T17:01:39,400.0,0.0,REGULAR,25,3.98033,d4b5bc0126a16062b641b32bfa584072bf659c03a7c57d...,1.64166,4046,2023-01-25T18:40:09,,"[{'power_W': Decimal('6608'), 'timestamp': Dec...",14,2023-01-25T21:00:00,0,10278,212.0


In [98]:
raw = pd.read_csv("data/raw_data.csv")
raw = raw.sort_values("connectTime")
raw["connectTime"] = pd.to_datetime(raw["connectTime"])
now = datetime.now().strftime('%D')
filtered = raw[raw["connectTime"] >= "01-25/2023"]
clean = pipeline.fit_transform(filtered)
clean = clean.explode(["time_vals", "power_vals"])
clean

Unnamed: 0.1,index,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,time_vals,power_vals
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:36:52,6593.0
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:41:52,6593.0
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:46:52,6593.0
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:51:52,6593.0
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:56:52,6593.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:35:06,6809.0
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:40:06,6809.0
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:45:06,6809.0
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:50:06,6809.0


In [100]:
clean["time_vals"] = clean["time_vals"].dt.round('5min')
clean["userId"] = clean["userId"].astype(str)
clean = clean.sort_values("time_vals")
clean

Unnamed: 0.1,index,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,time_vals,power_vals
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:35:00,6593.0
1,1,637,7200,3582.0,127.0,2023-01-25 07:42:05,Bolt,0 days 09:27:47,682,0,2023-01-25T17:30:00,2023-01-25T07:42:22,400.0,0.0,SCHEDULED,25,12.93823,9.46305,4035,2023-01-25T17:10:09,37311.0,"[{'power_W': Decimal('3554'), 'timestamp': Dec...",12,2023-01-25T17:30:00,1,32288.0,184.0,9.013959,2023-01-25 16:42:55,2023-01-25 07:40:00,3582.0
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:40:00,6593.0
0,0,2404,150000,6593.0,127.0,2023-01-25 07:36:52,bZ4X Limited,0 days 06:38:00,1259,1,,2023-01-25T07:37:09,400.0,0.0,REGULAR,25,12.70533,6.63333,4034,2023-01-25T14:15:09,,"[{'power_W': Decimal('6592'), 'timestamp': Dec...",15,2023-01-25T17:30:00,0,38398.0,184.0,5.824056,2023-01-25 13:26:19,2023-01-25 07:45:00,6593.0
1,1,637,7200,3582.0,127.0,2023-01-25 07:42:05,Bolt,0 days 09:27:47,682,0,2023-01-25T17:30:00,2023-01-25T07:42:22,400.0,0.0,SCHEDULED,25,12.93823,9.46305,4035,2023-01-25T17:10:09,37311.0,"[{'power_W': Decimal('3554'), 'timestamp': Dec...",12,2023-01-25T17:30:00,1,32288.0,184.0,9.013959,2023-01-25 16:42:55,2023-01-25 07:45:00,3582.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:35:00,6809.0
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:40:00,6809.0
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:45:00,6809.0
6,11,1715,170000,6809.0,250.0,2023-01-25 22:00:06,Model 3,0 days 00:59:40,902,1,,2023-01-25T22:00:30,400.0,0.0,REGULAR,25,1.86238,0.99444,4048,2023-01-25T23:00:10,,"[{'power_W': Decimal('6773'), 'timestamp': Dec...",11,2023-01-26T12:00:00,0,6274.0,137.0,0.921428,2023-01-25 22:55:23,2023-01-25 22:50:00,6809.0


In [101]:
fig = px.bar(clean, x=clean["time_vals"], y=clean["power_vals"], color=clean["userId"])
fig.update_yaxes(showgrid=False)
fig