In [2]:
import pandas as pd 
from sklearn.pipeline import Pipeline
from datacleaning.FetchData import FetchData
from datacleaning.CleanData import CleanData
from datetime import datetime
import plotly.express as px

pd.set_option("display.max_columns", 100)

In [3]:
FetchData.scan_save_all_records()
CleanData.clean_save_raw_data()

In [39]:
from sklearn.base import BaseEstimator, TransformerMixin
from datetime import datetime
import pandas as pd 
import numpy as np 


class SortDropCast(BaseEstimator, TransformerMixin):
    """
    This pipeline step will sort values by field "connectTime",
    drop columns "user_email", "slrpPaymentId", 
    and cast columns "cumEnergy_Wh", "peakPower_W" as float values. 
    "connectTime" will be converted into pd_datetime like.
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        now = datetime.now().strftime('%D')
        X = X.sort_values(by="connectTime").drop(columns=["user_email", "slrpPaymentId"]).reset_index(drop=True)
        X["connectTime"] = pd.to_datetime(X["connectTime"])
        X["cumEnergy_Wh"] = X["cumEnergy_Wh"].astype(float)
        X["peakPower_W"] = X["peakPower_W"].astype(float)
        X = X[X["connectTime"] >= now]
        return X


class HelperFeatureCreation(BaseEstimator, TransformerMixin):
    """
    This pipeline step will drop any records that contain 0 for 
    "peakPower_W" or "cumEnergy_Wh". Two additional columns will be created:
    "reqChargeTime" and "finishChargeTime".
    """
    def fit(self, X, y=None):
        return self

    @staticmethod
    def transform(X) -> pd.DataFrame:
        X = X.loc[(X["peakPower_W"] != 0) & (X["cumEnergy_Wh"] != 0)]
        X = X.assign(reqChargeTime_h=(X["cumEnergy_Wh"] / X["peakPower_W"]))
        X = X.assign(connectTime=(pd.to_datetime(X["connectTime"])))
        X = X.assign(
            finishChargeTime=(X["connectTime"] + pd.to_timedelta(X['reqChargeTime_h'], unit='hours').round("s"))
        )
        return X 


class CreateNestedSessionTimeSeries(BaseEstimator, TransformerMixin):
    """
    This pipeline step will create a time series for each session. Two new columns will be created, 
    "time_vals" and "power_vals", respective lists for a time and power demand. "time_vals" are rounded to the 
    closest 5 min. 
    """ 
    def __init__(self) -> None:
        super().__init__()
    
    def fit(self, X, y=None):
        return self 

    def transform(self, X) -> pd.DataFrame:
        self.ts_df = pd.DataFrame(columns=["time_vals", "power_vals"])
        X.apply(self.__create_ts, axis=1)
        X = pd.concat([X.reset_index(), self.ts_df], axis=1)
        X = X.explode(["time_vals", "power_vals"])
        X["userId"] = X["userId"].astype(str)
        return X

    def __create_ts(self, session):

        date_range = pd.date_range(start=session["connectTime"], end=session["finishChargeTime"], freq="5min").to_list()
        power_vals = np.ones(len(date_range)) * session["peakPower_W"]
        
        now = session["connectTime"].strftime('%D')
        temp_df = pd.DataFrame({"power":power_vals}, index=date_range).resample("5min").sum().reindex(index = pd.period_range(now, periods=288, freq='5min').to_timestamp(), fill_value=0)
        
        date_range = temp_df.index.to_list()
        power_vals = temp_df["power"].to_list()

        temp_df = pd.DataFrame([[date_range, power_vals]], columns=self.ts_df.columns)
        
        self.ts_df = pd.concat([self.ts_df, temp_df], ignore_index=True)


In [40]:
raw = pd.read_csv("data/raw_data.csv")
raw

Unnamed: 0.1,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,user_email,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,slrpPaymentId,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr
0,0,8800,3380,150.0,2022-01-07T06:23:52,lamartell@berkeley.edu,Prius Prime,0 days 01:40:50,603,0,2022-01-07T08:15:00,2022-01-07T06:24:19,500.0,0.0,SCHEDULED,25,3.26708,8021073541d47f523b5c018e82e34477ef6fc0f04610cd...,1.68055,1838,2022-01-07T08:05:09,8800.0,"[{'power_W': Decimal('1229'), 'timestamp': Dec...",12,2022-01-07T08:15:00,1,4214,200.0
1,1,6600,6563,250.0,2022-10-20T10:16:13,erfanm@berkeley.edu,Clarity PHEV,0 days 01:53:35,793,1,,2022-10-20T10:16:33,400.0,0.0,REGULAR,25,3.09348,46fd620dfb0f65509d36c93ced73d167268ade266ecb18...,1.89305,3416,2022-10-20T12:10:08,,"[{'power_W': Decimal('6563'), 'timestamp': Dec...",17,2022-10-20T15:30:00,0,11724,137.0
2,2,170000,6822,250.0,2023-02-02T22:08:40,amehregan@berkeley.edu,Model 3,0 days 00:56:17,902,1,,2023-02-02T22:08:51,400.0,0.0,REGULAR,25,1.78513,40aa08c906ce9403a383ba4142ad4784d6af32df1e3dd4...,0.93805,4119,2023-02-02T23:05:08,,"[{'power_W': Decimal('6822'), 'timestamp': Dec...",11,2023-02-03T12:00:00,0,6050,137.0
3,3,3600,3442,142.0,2022-03-22T15:39:06,nthaman@berkeley.edu,Volt,0 days 04:26:19,804,1,,2022-03-23T10:03:50,400.0,0.0,REGULAR,25,6.22580,1178d775901330553b8f1002dbfd65fc96e0a35a9a22e8...,4.43861,2289,2022-03-23T14:30:09,,"[{'power_W': Decimal('0'), 'timestamp': Decima...",16,2022-03-23T15:45:00,0,14529,129.0
4,4,170000,5170,250.0,2022-06-24T09:57:06,scott.moura@gmail.com,Model 3,0 days 06:37:26,615,1,,2022-06-24T09:57:41,400.0,0.0,REGULAR,25,9.57472,677481c9cf67f6563f944c9e44357b5e4a30925ef8a0b7...,6.62388,2733,2022-06-24T16:35:07,,"[{'power_W': Decimal('0'), 'timestamp': Decima...",16,2022-06-24T21:30:00,0,34904,137.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2996,2996,50000,919,250.0,2022-10-06T06:44:58,k.moua@berkeley.edu,Ioniq Electric,0 days 02:35:05,964,1,,2022-10-06T06:45:09,400.0,0.0,REGULAR,25,4.14445,54e801e6c08028f3ca182e3f9d7d6807d0cbcb7823aefe...,2.58472,3294,2022-10-06T09:20:14,,"[{'power_W': Decimal('919'), 'timestamp': Deci...",16,2022-10-06T20:00:00,0,15757,141.0
2997,2997,4500,3833,,2022-10-13T09:08:19,lauryn.holloway@berkeley.edu,Model 3,0 days 07:11:45,1126,1,,2022-10-13T09:08:29,,,REGULAR,25,,,7.19583,3345,2022-10-13T16:20:14,,"[{'power_W': Decimal('3833'), 'timestamp': Dec...",12,2022-10-13T16:45:00,0,42029,
2998,2998,3600,3444,50.0,2021-10-14T10:09:45,nthaman@berkeley.edu,Volt,0 days 05:09:48,804,0,2021-10-14T15:30:00,2021-10-14T10:10:20,400.0,0.0,SCHEDULED,25,3.16388,ac7de5785d27bc48d3bb03170daa36b047f3ebbe26cef5...,5.16333,1209,2021-10-14T15:20:08,18400.0,"[{'power_W': Decimal('3443'), 'timestamp': Dec...",18,2021-10-14T15:15:00,1,15281,200.0
2999,2999,4250,3239,127.0,2022-06-04T21:32:12,mwhitwill107@berkeley.edu,Volt,0 days 00:47:04,1154,0,2022-06-05T09:15:00,2022-06-04T21:33:03,400.0,0.0,SCHEDULED,25,15.35794,951bdeaba187978cfbda3f45e5d958c39f37b47b68d0f8...,0.78444,2648,2022-06-04T22:20:07,16500.0,"[{'power_W': Decimal('1936'), 'timestamp': Dec...",11,2022-06-05T02:00:00,1,2390,145.0


In [41]:
pipeline = Pipeline([
    ("1", SortDropCast()),
    ("2", HelperFeatureCreation()),
    ("3", CreateNestedSessionTimeSeries())
])

In [42]:
raw = pd.read_csv("data/raw_data.csv")
clean = pipeline.fit_transform(raw)
clean.head(20)

Unnamed: 0.1,index,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,time_vals,power_vals
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:00:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:05:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:10:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:15:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:20:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:25:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:30:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:35:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:40:00,0.0
0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:34:22,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.3947,3.57277,4135,2023-02-05T19:00:10,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,21986.0,137.0,3.382982,2023-02-05 17:30:28,2023-02-05 00:45:00,0.0


In [43]:
fig = px.bar(clean, x=clean["time_vals"], y=clean["power_vals"], color=clean["userId"])
fig.update_yaxes(showgrid=False)
fig

In [34]:
car_pie = clean[["dcosId", "vehicle_model"]].groupby("dcosId").first()
car_pie["vehicle_model"].value_counts()

Model Y    1
Name: vehicle_model, dtype: int64

In [35]:
px.pie(values = car_pie["vehicle_model"].value_counts(), names=car_pie["vehicle_model"].value_counts().index)

In [37]:
pd.read_csv("data/todays_sessions.csv")

Unnamed: 0.2,Unnamed: 0.1,index,Unnamed: 0,vehicle_maxChgRate_W,peakPower_W,sch_centsPerHr,connectTime,vehicle_model,Duration,userId,regular,Deadline,startChargeTime,sch_centsPerOverstayHr,sch_centsPerKwh,choice,siteId,estCost,DurationHrs,dcosId,lastUpdate,energyReq_Wh,power,stationId,defaultDeadline,scheduled,cumEnergy_Wh,reg_centsPerHr,reqChargeTime_h,finishChargeTime,time_vals,power_vals
0,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 00:00:00,0.0
1,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 00:05:00,0.0
2,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 00:10:00,0.0
3,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 00:15:00,0.0
4,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 00:20:00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 23:35:00,0.0
284,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 23:40:00,0.0
285,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 23:45:00,0.0
286,0,3000,2131,112000,6499.0,250.0,2023-02-05 14:07:29,Model Y,0 days 03:24:20,1168,1,,2023-02-05T15:25:48,400.0,0.0,REGULAR,25,5.16561,3.40555,4135,2023-02-05T18:50:08,,"[{'power_W': Decimal('6474'), 'timestamp': Dec...",18,2023-02-05T20:00:00,0,20959.0,137.0,3.224958,2023-02-05 17:20:59,2023-02-05 23:50:00,0.0


In [18]:
df = pd.read_csv("data/fivemindemand.csv")
df.set_index("time", drop=True, inplace=True)
df.index = pd.to_datetime(df.index)
df.resample("1H").max()

Unnamed: 0_level_0,avg_power_demand_W,energy_demand_kWh,peak_power_W,day,month
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-11-05 10:00:00,6335.0,0.527917,6335.0,Thursday,November
2020-11-05 11:00:00,6335.0,0.527917,6335.0,Thursday,November
2020-11-05 12:00:00,0.0,0.000000,0.0,Thursday,November
2020-11-05 13:00:00,0.0,0.000000,0.0,Thursday,November
2020-11-05 14:00:00,0.0,0.000000,0.0,Thursday,November
...,...,...,...,...,...
2023-02-19 01:00:00,107.0,0.008917,107.0,Sunday,February
2023-02-19 02:00:00,107.0,0.008917,107.0,Sunday,February
2023-02-19 03:00:00,107.0,0.008917,107.0,Sunday,February
2023-02-19 04:00:00,107.0,0.008917,107.0,Sunday,February


In [22]:
df = pd.read_csv("data/todays_sessions.csv")
df.shape

(2592, 32)

In [25]:
from sklearn.pipeline import Pipeline
import pandas as pd
import datacleaning.fullcleaningclasses as pc
import datacleaning.sessionlevelcleaningclasses as sc

raw_data = pd.read_csv("data/raw_data.csv")

# session level pipeline
session_lvl_pipeline = Pipeline(
    [
        ("sort_drop_cast", sc.SortDropCast()),
        ("create_helpers", sc.HelperFeatureCreation()),
        ("nested_ts", sc.CreateNestedSessionTimeSeries()),
        ("save_csv", sc.SaveCSV()),
    ]
)

X = session_lvl_pipeline.fit_transform(raw_data)
print(X)

   Unnamed: 0  vehicle_maxChgRate_W  peakPower_W  sch_centsPerHr  \
0           0                  8800         3380           150.0   
1           1                  6600         6563           250.0   
2           2                170000         6822           250.0   
3           3                  3600         3442           142.0   
4           4                170000         5170           250.0   

           connectTime              user_email vehicle_model         Duration  \
0  2022-01-07T06:23:52  lamartell@berkeley.edu   Prius Prime  0 days 01:40:50   
1  2022-10-20T10:16:13     erfanm@berkeley.edu  Clarity PHEV  0 days 01:53:35   
2  2023-02-02T22:08:40  amehregan@berkeley.edu       Model 3  0 days 00:56:17   
3  2022-03-22T15:39:06    nthaman@berkeley.edu          Volt  0 days 04:26:19   
4  2022-06-24T09:57:06   scott.moura@gmail.com       Model 3  0 days 06:37:26   

   userId  regular             Deadline      startChargeTime  \
0     603        0  2022-01-07T08:15:00 