In [None]:
# import necessary libraries
import pandas as pd
import os
import glob
import seaborn as sns
import plotly.express as px
from sklearn.metrics import accuracy_score
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np

pd.options.mode.chained_assignment = None

In [None]:
# use glob to get all the csv files
# in the folder
path = "../data/audi_etron/"
csv_files = glob.glob(os.path.join(path, "*.csv"))

dfs = []
# loop over the list of csv files
# max_drive = 6
max_drive = 16

for i, f in enumerate(csv_files):
    drive = int(f.split("Drive")[1].replace(".csv", ""))
    # read the csv file
    df = pd.read_csv(f)
    df["exp_no"] = f
    dfs.append(df)

df = pd.concat(dfs, axis=0)
df = df[df["R1L First Name"] != "Test"]

### Optional (extra analysis on Audi experiment Dec 2022)

In [None]:
# use glob to get all the csv files
# in the folder
path = "../data/audi/R1L/R1L/"
csv_files = glob.glob(os.path.join(path, "*.xlsx"))

dfs = []
# loop over the list of csv files
for i, f in enumerate(csv_files):
    drive = f.split("R1L")[0]
    # read the csv file
    df = pd.read_excel(f)
    df["exp_no"] = f
    dfs.append(df)

df2 = pd.concat(dfs, axis=0)

In [None]:
df2["OTS estimate (rounded)"] = df2[
    "ClimateSenseGUI_eTronDemo_Front&RearZone_DBC::CSM_ZonalSignalsA_FrontRow::otsEst_R1L_OAL_d_non_G[non]"
].apply(lambda x: np.round(x, 0))
df2["Overall thermal sensation (rounded)"] = df2[" Overall Thermal Sensation "].apply(
    lambda x: np.round(x, 0)
)
acc = accuracy_score(
    df2["Overall thermal sensation (rounded)"], df2["OTS estimate (rounded)"]
)
print("Accuracy of OTS prediction: ", acc)

In [None]:
# df2["diff"] = df2[" Overall Thermal Sensation "]-df2["ClimateSenseGUI_eTronDemo_Front&RearZone_DBC::CSM_ZonalSignalsA_FrontRow::otsEst_R1L_OAL_d_non_G[non]"]
df2["diff"] = df2["Overall thermal sensation (rounded)"] - df2["OTS estimate (rounded)"]
df2["diff"].hist(bins=6, align="left").set_title(
    "Error distribution (Y true - Y predicted) for OTS prediction"
)

In [None]:
difs = df2[
    [
        "ClimateSenseLog_eTron_06_06_22::OtfCtrlA_R1L::tSurfSet_R1L_ESB_HMAT_OVAL_d_C_G[C]",
        "ClimateSenseLog_eTron_06_06_22::ItfEstB_R1L::tSurfSet_R1L_ESC_HMAT_OVAL_d_C_G",
        "ClimateSenseLog_eTron_06_06_22::OtfCtrlA_R1L::tSurfSet_R1L_EFL_PTC_OVAL_d_C_G",
    ]
].diff()
for key in difs.keys():
    difs[key] = difs[key].apply(lambda x: 1 if abs(x) > 0 else 0)
print("number of changes per 30 min: ")
difs.sum(axis=0) / (len(df2) * 10 / (60 * 30))

## Analysis

In [None]:
sns.violinplot(
    data=df, y="R1L Target OTS (-3=verycold to 3=verywarm)", x="R1L Sex"
).set_title("Distribution of OTS score per gender")

In [None]:
fig = px.scatter(
    df.groupby(by="exp_no").agg("mean"),
    x="Outside Temp (C)",
    y="R1L Target OTS (-3=verycold to 3=verywarm)",
    trendline="ols",
    title="Target OTS vs outside temperature",
)
fig.update_layout(
    autosize=False, width=500, height=500, margin=dict(l=40, r=20, t=50, b=50)
)

fig.show()

In [None]:
fig = px.scatter(
    df.groupby(by="exp_no").agg("mean"),
    x="R1L Weight (lbs)",
    y="R1L Target OTS (-3=verycold to 3=verywarm)",
    trendline="ols",
    title="Target OTS vs body weight",
)
fig.update_layout(
    autosize=False, width=500, height=500, margin=dict(l=40, r=20, t=50, b=50)
)
fig.show()

In [None]:
fig = px.scatter(
    df.groupby(by="exp_no").agg("mean"),
    x="R1L Height (cm)",
    y="R1L Target OTS (-3=verycold to 3=verywarm)",
    trendline="ols",
    title="Target OTS vs person height",
)
fig.update_layout(
    autosize=False, width=500, height=500, margin=dict(l=40, r=20, t=50, b=50)
)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"][
            "R1L Target OTS (-3=verycold to 3=verywarm)"
        ],
        name="Target OTS",
    ),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"]["Cabin Temp (C)"],
        name="Cabin temperature",
    ),
    secondary_y=False,
)
fig.update_layout(
    autosize=False,
    width=800,
    height=250,
    margin=dict(l=40, r=20, t=50, b=50),
    title="Target OTS vs cabin temperature for 3rd drive",
    xaxis_title="Time",
    yaxis_title="Temperature (C)",
)
fig.update_yaxes(title_text="Target OTS", secondary_y=True)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"][
            "R1L Target OTS (-3=verycold to 3=verywarm)"
        ],
        name="Target OTS",
    ),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"]["Cabin Temp (C)"],
        name="Cabin temperature",
    ),
    secondary_y=False,
)
fig.update_layout(
    autosize=False,
    width=800,
    height=250,
    margin=dict(l=40, r=20, t=50, b=50),
    title="Target OTS vs cabin temperature for 4th drive",
    xaxis_title="Time",
    yaxis_title="Temperature (C)",
)
fig.update_yaxes(title_text="Target OTS", secondary_y=True)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"][
            "R1L Target OTS (-3=verycold to 3=verywarm)"
        ],
        name="Target OTS",
    ),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"]["Cabin Temp (C)"],
        name="Cabin temperature",
    ),
    secondary_y=False,
)
fig.update_layout(
    autosize=False,
    width=800,
    height=250,
    margin=dict(l=40, r=20, t=50, b=50),
    title="Target OTS vs cabin temperature for 6th drive",
    xaxis_title="Time",
    yaxis_title="Temperature (C)",
)
fig.update_yaxes(title_text="Target OTS", secondary_y=True)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"][
            "R1L Target OTS (-3=verycold to 3=verywarm)"
        ],
        name="Target OTS",
    ),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive3.csv"][
            "Solar Intensity (W/m^2)"
        ],
        name="Solar Intensity (W/m^2)",
    ),
    secondary_y=False,
)
fig.update_layout(
    autosize=False,
    width=800,
    height=250,
    margin=dict(l=40, r=20, t=50, b=50),
    title="Target OTS vs solar intensity for 3rd drive",
    xaxis_title="Time",
    yaxis_title="Solar Intensity (W/m^2)",
)
fig.update_yaxes(title_text="Target OTS", secondary_y=True)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"][
            "R1L Target OTS (-3=verycold to 3=verywarm)"
        ],
        name="Target OTS",
    ),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive4.csv"][
            "Solar Intensity (W/m^2)"
        ],
        name="Solar Intensity (W/m^2)",
    ),
    secondary_y=False,
)
fig.update_layout(
    autosize=False,
    width=800,
    height=250,
    margin=dict(l=40, r=20, t=50, b=50),
    title="Target OTS vs solar intensity for 4th drive",
    xaxis_title="Time",
    yaxis_title="Solar Intensity (W/m^2)",
)
fig.update_yaxes(title_text="Target OTS", secondary_y=True)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"][
            "R1L Target OTS (-3=verycold to 3=verywarm)"
        ],
        name="Target OTS",
    ),
    secondary_y=True,
)
fig.add_trace(
    go.Scatter(
        x=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"]["_local_time"],
        y=df[df["exp_no"] == "../data/audi_etron\\Drive6.csv"][
            "Solar Intensity (W/m^2)"
        ],
        name="Solar Intensity (W/m^2)",
    ),
    secondary_y=False,
)
fig.update_layout(
    autosize=False,
    width=800,
    height=250,
    margin=dict(l=40, r=20, t=50, b=50),
    title="Target OTS vs solar intensity for 6th drive",
    xaxis_title="Time",
    yaxis_title="Solar Intensity (W/m^2)",
)
fig.update_yaxes(title_text="Target OTS", secondary_y=True)
fig.show()

## Preprocessing

In [None]:
selection = [
    "R1L Weight (lbs)",
    "Outside Temp (C)",
    "Cabin Temp (C)",
    "R1L First Name",
    "R1L Sex",
    "exp_no",
]
target = "R1L Target OTS (-3=verycold to 3=verywarm)"
# df2 = df[["R1R Weight (lbs)", "Outside Temp (C)", "Cabin Temp (C)", "R1R First Name", "R1R Sex", "exp_no", "R1R Target OTS (-3=verycold to 3=verywarm)"]]
# df2 = df2.rename(columns={"R1R Weight (lbs)": "R1L Weight (lbs)", "R1R First Name": "R1L First Name", "R1R Sex": "R1L Sex", "R1R Target OTS (-3=verycold to 3=verywarm)": "R1L Target OTS (-3=verycold to 3=verywarm)"})
# df2["exp_no"] = df2["exp_no"].apply(lambda x: x+ "_2")
# df = df[selection + [target]]
# df = pd.concat([df, df2], axis=0)
df = df[selection + [target]]
df = df.dropna()

df["male"] = df["R1L Sex"].apply(lambda x: 1 if x == "Male" else 0)
df["female"] = df["R1L Sex"].apply(lambda x: 1 if x == "Female" else 0)
sub_df = df.groupby("exp_no").nunique()[target].reset_index()
change_setpoint = sub_df[sub_df[target] > 1]["exp_no"].values
other = sub_df[sub_df[target] == 1]["exp_no"].values

## (Optional) Feature engineering

In [None]:
# (Optional) train a personal model for participants that change their setpoint, the coefficients could be used to scale the outside temp of the general model
features = ["Cabin Temp (C)"]
weights = {}
for experiment in change_setpoint:
    sub_df = df[df["exp_no"] == experiment]
    initial_setpoint = sub_df[target].head(1).values[0]
    sub_df[target] = sub_df[target].apply(lambda x: x - initial_setpoint)
    feature_df = sub_df[features]
    target_df = sub_df[target]
    model = LogisticRegression(random_state=0)
    pipe = make_pipeline(StandardScaler(), model)
    pipe.fit(feature_df, target_df)
    print("Accuracy score: ", accuracy_score(target_df, pipe.predict(feature_df)))
    weights[experiment] = {
        "Cabin Temp (C)": pipe.named_steps["logisticregression"].coef_[0][0]
    }

# set other weights with the average value
for exp in [i for i in list(df["exp_no"].unique()) if i not in change_setpoint]:
    weights[exp] = {
        "Cabin Temp (C)": np.mean(
            [weights[i]["Cabin Temp (C)"] for i in weights.keys()]
        )
    }

# scale the cabin temperature with the personal weight
df["cabin_temp_weight"] = df["exp_no"].apply(lambda x: weights[x]["Cabin Temp (C)"])
df["scaled_cabin_temp"] = df["Cabin Temp (C)"] * df["cabin_temp_weight"]

## Model training and validation

In [None]:
def cross_validate(df, features, target, change_setpoint, other, verbose=0):
    accs = []
    for test_exp in np.concatenate((change_setpoint, other)):
        sub_df = df[df["exp_no"].isin(np.concatenate((change_setpoint, other)))]
        model = LogisticRegression(random_state=0, class_weight="balanced")
        pipe = make_pipeline(StandardScaler(), model)
        test_set = True
        if test_set:
            pipe.fit(
                sub_df[~sub_df["exp_no"].isin([test_exp])][features],
                sub_df[~sub_df["exp_no"].isin([test_exp])][target],
            )
        else:
            pipe.fit(sub_df[features], sub_df[target])

        if test_set:
            test_target = df[df["exp_no"].isin([test_exp])][target]
            test_prediction = pipe.predict(df[df["exp_no"].isin([test_exp])][features])
        else:
            test_target = df[target]
            test_prediction = pipe.predict(df[features])
        if verbose:
            print(
                "Accuracy score: ",
                accuracy_score(test_target, test_prediction),
                "Experiment no: ",
                test_exp,
            )
            print("Predictions: ", test_prediction)
            print("Target: ", test_target.values)
        accs.append(accuracy_score(test_target, test_prediction))
    print(features)
    print("Average accuracy: ", np.mean(accs))
    print("Rounded acc: ", len([i for i in accs if i > 0.3]) / len(accs))

In [None]:
# Performance for different feature combinations
features = ["male", "female", "R1L Weight (lbs)"]
target = "R1L Target OTS (-3=verycold to 3=verywarm)"
cross_validate(df, features, target, change_setpoint, other)
features = ["male", "female", "R1L Weight (lbs)", "Outside Temp (C)"]
cross_validate(df, features, target, change_setpoint, other, verbose=0)
features = ["male", "female", "R1L Weight (lbs)", "Cabin Temp (C)"]
cross_validate(df, features, target, change_setpoint, other, verbose=0)
features = ["male", "female", "R1L Weight (lbs)", "scaled_cabin_temp"]
cross_validate(df, features, target, change_setpoint, other, verbose=0)

features = [
    "male",
    "female",
    "R1L Weight (lbs)",
    "scaled_cabin_temp",
    "Outside Temp (C)",
]
cross_validate(df, features, target, change_setpoint, other, verbose=0)