In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from scipy.stats import mode

from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import matplotlib.pyplot as plt

### Notebook for applying filtering to generated predictions in generated CSV file from step C. 

In [None]:
# Replace 'file.csv' with the path to your CSV file
df = pd.read_csv('/home/ubuntu/work/SRLF/data/examples/labels_and_predictions_1.csv')

def apply_mode_filter(data, window_size=141):
    """
    Apply a mode filter to the data with the specified window size.
    
    Parameters:
    - data: numpy array of shape (n,)
    - window_size: Size of the window to calculate the mode, must be an odd integer
    
    Returns:
    - filtered_data: numpy array of shape (n,) after applying the mode filter
    """
    if window_size % 2 == 0:
        raise ValueError("Window size must be odd.")
    
    half_window = window_size // 2
    n = len(data)
    dtype = data.dtype if isinstance(data, np.ndarray) else data.cpu().numpy().dtype
    filtered_data = np.empty(n, dtype=dtype)
    
    for i in range(n):
        # Calculate the start and end of the window
        start = max(0, i - half_window)
        end = min(n, i + half_window + 1)
        
        # Find the mode in the window
        window_mode = mode(data[start:end])[0]
        
        # Assign the mode to the filtered data
        filtered_data[i] = window_mode
        
    return filtered_data


from sklearn.metrics import accuracy_score
from scipy.stats import mode

# Calculate accuracy before applying the mode filter
accuracy_before = accuracy_score(df['True Labels'], df['Predicted Labels'])

# Apply the mode filter to the 'Predicted Labels' column
df['Filtered Predictions'] = apply_mode_filter(df['Predicted Labels'].values)

# Calculate accuracy after applying the mode filter
accuracy_after = accuracy_score(df['True Labels'], df['Filtered Predictions'])

accuracy_before, accuracy_after

#### Create Confusion Matrix

In [None]:
# Compute the confusion matrix
cm = confusion_matrix(df['True Labels'], df['Filtered Predictions'])

# Normalize the confusion matrix
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Round the confusion matrix values to two decimal places
cm = np.around(cm, decimals=2)

# Increase figure size for better visibility
fig, ax = plt.subplots(figsize=(20, 20))  # You can adjust the size as needed

# Create the confusion matrix display object
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(df['True Labels']))

# Use larger font for the numbers inside the boxes
plt.rcParams.update({'font.size': 23})  # Adjust font size as needed
ax.xaxis.label.set_size(30)
ax.yaxis.label.set_size(30)

# Use larger font for the numbers inside the boxes
plt.rcParams.update({'font.size': 23})  # Adjust font size as needed

# Plot the confusion matrix with color map
disp.plot(cmap=plt.cm.Blues, values_format='.2f', ax=ax)

# Show the confusion matrix
plt.show()