In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import joblib
import torch

In [10]:
df = pd.read_csv('labeled_ml.csv')

In [11]:
df.head()

Unnamed: 0,Sensor ID,Date,o2,co2,temp,label,fruit
0,1,2023-12-21 00:00:00,20.711216,0.032239,1.287622,Normal,cherry
1,1,2023-12-21 01:00:00,20.711216,0.032239,1.287622,Normal,cherry
2,1,2023-12-21 02:00:00,20.752923,0.162936,0.72104,Normal,cherry
3,1,2023-12-21 03:00:00,20.752923,0.162936,0.72104,Normal,cherry
4,1,2023-12-21 04:00:00,20.514075,0.074559,0.085129,Normal,cherry


In [12]:
df.isna().sum()

Sensor ID    0
Date         0
o2           0
co2          0
temp         0
label        0
fruit        0
dtype: int64

In [13]:
df['label'] = df['label'].apply(lambda x: x.split(','))

In [14]:
df

Unnamed: 0,Sensor ID,Date,o2,co2,temp,label,fruit
0,1,2023-12-21 00:00:00,20.711216,0.032239,1.287622,[Normal],cherry
1,1,2023-12-21 01:00:00,20.711216,0.032239,1.287622,[Normal],cherry
2,1,2023-12-21 02:00:00,20.752923,0.162936,0.721040,[Normal],cherry
3,1,2023-12-21 03:00:00,20.752923,0.162936,0.721040,[Normal],cherry
4,1,2023-12-21 04:00:00,20.514075,0.074559,0.085129,[Normal],cherry
...,...,...,...,...,...,...,...
84888,157,2024-01-13 18:00:00,0.656095,5.783519,-0.267696,"[Normal, Low O2]",cherry
84889,157,2024-01-13 19:00:00,0.656095,5.783519,-0.267696,"[Normal, Low O2]",cherry
84890,157,2024-01-13 20:00:00,0.479308,5.470705,-0.487633,"[Normal, Low O2]",cherry
84891,157,2024-01-13 21:00:00,0.479308,5.470705,-0.487633,"[Normal, Low O2]",cherry


In [15]:
mlb = MultiLabelBinarizer()
label_encoder = LabelEncoder()

In [16]:
binary_labels = mlb.fit_transform(df['label'])
df['fruit'] = label_encoder.fit_transform(df['fruit'])

In [17]:
mlb.classes_

array(['CO2 Problem', 'Condensation (CO2)', 'Condensation (O2)',
       'High CO2', 'High O2', 'Lid Open', 'Low CO2', 'Low O2', 'Normal',
       'O2 Problem'], dtype=object)

In [18]:
joblib.dump(mlb, 'mlb.pkl')

['mlb.pkl']

In [None]:
binary_labels

In [None]:
labels_df = pd.DataFrame(binary_labels, columns=mlb.classes_)
labels_df

In [None]:
labels_df.columns = labels_df.columns.str.lower().str.replace(' ', '_').str.replace(r'[()]', '', regex=True)

In [None]:
labels_df

In [None]:
processed_df = pd.concat([df.drop('label', axis=1), labels_df], axis=1)

In [None]:
processed_df.drop(['Sensor ID', 'Date'], axis=1, inplace=True)

In [None]:
processed_df

In [None]:
def create_window_size(data, window_size, stride):
    X = []
    y = []
    for i in range(0, len(data) - window_size + 1, stride):
        window = data.iloc[i:i + window_size][['o2', 'co2', 'temp', 'fruit']]
        flattened_window = window.to_numpy().flatten()
        X.append(flattened_window)
        y.append(data.iloc[i + window_size - 1][labels_df.columns])
    return np.array(X), np.array(y)

In [None]:
window_size = 48
stride = 24
X, y = create_window_size(data=processed_df, window_size=window_size, stride=stride)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

In [None]:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [None]:
X_train.shape, y_train.shape

In [None]:
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [None]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)