In [1]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import joblib
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('labeled_ml.csv')

In [3]:
mlb = joblib.load('mlb.pkl')

In [4]:
def predict_classes(df, window_size=48, stride=24):
    """
    Predict the classes for sequences of sensor data using a pre-trained model.

    Parameters:
    -----------
    df : pandas.DataFrame
        The input dataframe containing sensor data. It must have columns 'o2', 'co2', 'temp', and 'date'.
        Optionally, it may contain a 'fruit' column for encoding categorical data.
    
    window_size : int, optional, default=48
        The size of the window over which the input data is segmented for making predictions.
    
    stride : int, optional, default=24
        The step size for sliding the window across the data. Determines the overlap between windows.

    Returns:
    --------
    predicted_labels : list of tuples
        A list of tuples where each tuple contains the 'date' value corresponding to the start of the window in the 
        original dataframe and the predicted class labels for that window.
    
    Notes:
    ------
    - The function assumes that a label encoder is stored in 'mlb.pkl' and that the
      model is stored as a TorchScript file named 'cnn_model_48h_24s.pt'.
    - If the 'fruit' column is present in the dataframe, it will be encoded and used as part of the input features.
    - The model expects the input tensor shape to be (batch_size, window_size * num_features, 1).
    - Predictions are made by applying a sigmoid function to the model's outputs and interpreting 
      values greater than 0.5 as positive class predictions.
    - The predicted class indices are mapped back to their original labels using the label encoder.

    Example:
    --------
    >>> df = pd.DataFrame({
            'o2': [...],
            'co2': [...],
            'temp': [...],
            'fruit': ['apple', 'banana', 'apple', ...],
            'date': [...]
        })
    >>> predictions = predict_classes(df)
    """
    mlb = joblib.load('mlb.pkl')

    device = "cuda" if torch.cuda.is_available() else "cpu"

    label_encoder = LabelEncoder()

    if 'fruit' in df.columns:
        df['fruit'] = label_encoder.fit_transform(df['fruit'])
        X = np.array([df.iloc[i:i + window_size][['o2', 'co2', 'temp', 'fruit']].to_numpy().flatten()
                  for i in range(0, len(df) - window_size + 1, stride)]).reshape(-1, window_size * 4, 1)
        dates = [df.iloc[i]['Date'] for i in range(0, len(df) - window_size + 1, stride)]
        sensor_ids = [df.iloc[i]['Sensor ID'] for i in range(0, len(df) - window_size + 1, stride)]
    else:
        print(f"Missing required columns in data. Required columns: 'fruit'")
        return None
    
    inputs = torch.tensor(X, dtype=torch.float32).to(device)

    model = torch.jit.load('cnn_model_48h_24s.pt').to(device)

    model.eval()

    with torch.no_grad():
        outputs = model(inputs)
        sigmoid_outputs = torch.sigmoid(outputs)
        predicted_tensor = (sigmoid_outputs > 0.5).float()

        for i in range(predicted_tensor.size(0)):
            if predicted_tensor[i].sum() == 0:
                max_prob_index = torch.argmax(sigmoid_outputs[i])
                predicted_tensor[i][max_prob_index] = 1
                
    predicted_indices = predicted_tensor.cpu().numpy()

    predicted_labels = mlb.inverse_transform(predicted_indices)

    predictions_with_dates_and_sensors = [(date, sensor_id, label) for date, sensor_id, label in zip(dates, sensor_ids, predicted_labels)]

    return predictions_with_dates_and_sensors

In [5]:
def fill_labels_with_predictions(df, predictions):
    """
    Fills the label column in the dataframe with the predicted class labels using backward fill.

    Parameters:
    -----------
    df : pandas.DataFrame
        The original dataframe containing the sensor data with a 'Date' column.
    
    predictions : list of tuples
        The list of tuples where each tuple contains a 'date' and 
        the corresponding predicted labels.

    Returns:
    --------
    df : pandas.DataFrame
        The original dataframe with the 'label' column filled using the predicted class labels and backward fill.
    """
    df_copy = df.copy()
    predictions_copy = predictions.copy()
    predictions_df = pd.DataFrame(predictions_copy, columns=['Date',  'Sensor ID', 'predicted_labels'])
    
    predictions_df['Date'] = pd.to_datetime(predictions_df['Date'])
    df_copy['Date'] = pd.to_datetime(df_copy['Date'])
    
    df_copy = pd.merge(df_copy, predictions_df, on=['Date', 'Sensor ID'], how='left')
    
    df_copy['predicted_labels'] = df_copy['predicted_labels'].fillna(method='ffill')

    return df_copy

In [6]:
predicted = predict_classes(df)

In [7]:
predictions_df = fill_labels_with_predictions(df, predicted)

In [9]:
predictions_df[predictions_df["Sensor ID"] ==30]

Unnamed: 0,Sensor ID,Date,o2,co2,temp,label,fruit,predicted_labels
14710,30,2023-12-21 00:00:00,20.623299,0.080325,2.285605,Normal,0,"(Condensation (CO2),)"
14711,30,2023-12-21 01:00:00,20.415465,0.267883,2.105598,Normal,0,"(Condensation (CO2),)"
14712,30,2023-12-21 02:00:00,20.565267,-0.133140,1.659492,Normal,0,"(Condensation (CO2), Normal)"
14713,30,2023-12-21 03:00:00,20.394583,0.017269,1.371026,Normal,0,"(Condensation (CO2), Normal)"
14714,30,2023-12-21 04:00:00,20.754635,0.071085,1.220197,Normal,0,"(Condensation (CO2), Normal)"
...,...,...,...,...,...,...,...,...
14907,30,2023-12-29 05:00:00,11.510542,7.048108,-0.410693,Normal,0,"(Normal,)"
14908,30,2023-12-29 06:00:00,11.444219,7.285766,-0.613312,Normal,0,"(Normal,)"
14909,30,2023-12-29 07:00:00,11.481728,7.530526,-0.737847,Normal,0,"(Normal,)"
14910,30,2023-12-29 08:00:00,11.749373,6.959724,-0.513911,Normal,0,"(Normal,)"
