In [4]:
import scipy.io
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [5]:
def helperMovingAverage(df):
    new_df = df.iloc[::100, [0,1,2,-1]]
    new_df.columns = ["Voltage", "Current", "Temperature", "SOC"]

    # Calculate moving averages
    new_df['AverageVoltage'] = new_df['Voltage'].rolling(window=6, min_periods=1).mean()
    new_df['AverageCurrent'] = new_df['Current'].rolling(window=6, min_periods=1).mean()

    # Reorder columns
    new_df = new_df[["Voltage", "Current", "AverageVoltage", "AverageCurrent", "Temperature", "SOC"]]

    return new_df


In [6]:
mat = scipy.io.loadmat('./battery_dataset_10to25C/Train/train_10to25C.mat')
X = mat['X']
Y = mat['Y']    
x = X.T
y = Y.T
df_x = pd.DataFrame(x, columns=['Voltage', 'Current', 'Temperature', 'IDK1', 'IDK2'])
df_y = pd.DataFrame(y, columns=['SOC'])
df_y.reset_index(drop=True, inplace=True)
trainDataFull = pd.concat([df_x, df_y], axis=1)
trainDataFull.to_csv('train_dataset.csv', index=False)
print(trainDataFull)
print(trainDataFull.shape)

         Voltage  Current  Temperature      IDK1     IDK2       SOC
0       0.385148  0.75102     0.303101  0.385148  0.75102  0.206417
1       0.385152  0.75102     0.304591  0.385150  0.75102  0.206417
2       0.385156  0.75102     0.306081  0.385152  0.75102  0.206417
3       0.385160  0.75102     0.307572  0.385154  0.75102  0.206417
4       0.385164  0.75102     0.309062  0.385156  0.75102  0.206417
...          ...      ...          ...       ...      ...       ...
669951  0.478843  0.75102     0.008477  0.459558  0.75102  0.283243
669952  0.478843  0.75102     0.008477  0.459699  0.75102  0.283243
669953  0.478843  0.75102     0.008477  0.459839  0.75102  0.283243
669954  0.478961  0.75102     0.008477  0.459979  0.75102  0.283243
669955  0.478961  0.75102     0.008477  0.460117  0.75102  0.283243

[669956 rows x 6 columns]
(669956, 6)


In [36]:
idx0 = list(range(1, 184258))
idx10 = list(range(184258, 337974))
idx25 = list(range(337974, 510531))
idxN10 = list(range(510531, 669956))

trainData0deg = helperMovingAverage(trainDataFull.iloc[idx0, :])
trainData10deg = helperMovingAverage(trainDataFull.iloc[idx10, :])
trainData25deg = helperMovingAverage(trainDataFull.iloc[idx25, :])
trainDataN10deg = helperMovingAverage(trainDataFull.iloc[idxN10, :])


In [37]:
trainData = pd.concat([trainData0deg, trainData10deg, trainData25deg, trainDataN10deg])
print(trainData)
print(trainData.shape)


         Voltage  Current  AverageVoltage  AverageCurrent  Temperature  \
1       0.385152  0.75102        0.385152        0.751020     0.304591   
101     0.385462  0.75102        0.385307        0.751020     0.457657   
201     0.385496  0.75102        0.385370        0.751020     0.623627   
301     0.385653  0.75102        0.385441        0.751020     0.771269   
401     0.386441  0.75102        0.385641        0.751020     0.847149   
...          ...      ...             ...             ...          ...   
669531  0.438553  0.75102        0.256802        0.682461     0.036806   
669631  0.458176  0.75102        0.301665        0.696795     0.025475   
669731  0.468750  0.75102        0.336484        0.704279     0.019809   
669831  0.474733  0.75102        0.410066        0.733806     0.014143   
669931  0.478368  0.75102        0.452500        0.751020     0.011310   

             SOC  
1       0.206417  
101     0.206417  
201     0.206417  
301     0.206417  
401     0.206417

In [None]:
dataframes = {'0deg': trainData0deg, '10deg': trainData10deg, '25deg': trainData25deg, 'N10deg': trainDataN10deg}
columns = ['Voltage', 'Current', 'Temperature', 'SOC']
sns.set_style("whitegrid")
save_dir = 'dataset_visualisation/train'
for key, df in dataframes.items():
    df_dir = os.path.join(save_dir, key)
    os.makedirs(df_dir, exist_ok=True)
    for i in range(len(columns)):
        for j in range(len(columns)):
            if i == j:
                continue
            plt.figure(figsize=(10, 8))
            sns.lineplot(x=df[columns[i]], y=df[columns[j]])
            plt.xlabel(columns[i])
            plt.ylabel(columns[j])
            plt.savefig(os.path.join(df_dir, f'{columns[j]}_vs_{columns[i]}.png'))

        plt.figure(figsize=(8, 6))
        sns.lineplot(x=df.index, y=df[columns[i]])
        plt.xlabel('Index')
        plt.ylabel(columns[i])
        plt.savefig(os.path.join(df_dir, f'{columns[i]}_vs_index.png'))


In [40]:
files = ['test_0C.mat', 'test_10C.mat', 'test_25C.mat', 'test_neg10C.mat']
dir_path = './battery_dataset_10to25C/Test/'

for file in files:
    mat = scipy.io.loadmat(dir_path + file)
    X = mat['X']
    Y = mat['Y']    
    x = X.T
    y = Y.T
    df_x = pd.DataFrame(x, columns=['Voltage', 'Current', 'Temperature', 'IDK1', 'IDK2'])
    df_y = pd.DataFrame(y, columns=['SOC'])
    df_y.reset_index(drop=True, inplace=True)
    testDataFull = pd.concat([df_x, df_y], axis=1)
    testDataFull.to_csv(file.replace('.mat', '.csv'), index=False)
    print(testDataFull)
    print(testDataFull.shape)

        Voltage   Current  Temperature      IDK1      IDK2       SOC
0      0.968130  0.749112     0.260607  0.968130  0.749112  1.000000
1      0.962495  0.746992     0.260607  0.965313  0.748052  0.999990
2      0.961792  0.747098     0.260607  0.964139  0.747734  0.999983
3      0.961555  0.746992     0.260607  0.963493  0.747549  0.999973
4      0.961325  0.747098     0.260607  0.963059  0.747459  0.999963
...         ...       ...          ...       ...       ...       ...
42525  0.413442  0.751020     0.269106  0.402145  0.751020  0.216727
42526  0.413442  0.751020     0.269106  0.402244  0.751020  0.216727
42527  0.413560  0.751020     0.269106  0.402341  0.751020  0.216727
42528  0.413561  0.751020     0.271939  0.402438  0.751020  0.216727
42529  0.413561  0.751020     0.271939  0.402534  0.751020  0.216727

[42530 rows x 6 columns]
(42530, 6)
        Voltage   Current  Temperature      IDK1      IDK2       SOC
0      0.968019  0.749112     0.521236  0.968019  0.749112  1.0000

In [None]:
files = ['test_0C.csv', 'test_10C.csv', 'test_25C.csv', 'test_neg10C.csv']
dataframes = {}
for file in files:
    df = pd.read_csv(f'./csv_battery_dataset/{file}')
    key = file.replace('.csv','')
    dataframes[key] = df
    
columns = ['Voltage', 'Current', 'Temperature', 'SOC']
sns.set_style("whitegrid")
save_dir = 'dataset_visualisation/test'
for key, df in dataframes.items():
    df_dir = os.path.join(save_dir, key)
    os.makedirs(df_dir, exist_ok=True)
    for i in range(len(columns)):
        for j in range(len(columns)):
            if i == j:
                continue
            plt.figure(figsize=(10, 8))
            sns.lineplot(x=df[columns[i]], y=df[columns[j]])
            plt.xlabel(columns[i])
            plt.ylabel(columns[j])
            plt.savefig(os.path.join(df_dir, f'{columns[j]}_vs_{columns[i]}.png'))

        plt.figure(figsize=(8, 6))
        sns.lineplot(x=df.index, y=df[columns[i]])
        plt.xlabel('Index')
        plt.ylabel(columns[i])
        plt.savefig(os.path.join(df_dir, f'{columns[i]}_vs_index.png'))
