# Random Tree Classifier on bbox coordinates/cropped images

##### Importing required libraries

In [3]:
from torch.utils.data import DataLoader
from src.utils import collate_fn
from src.dataset import CropsScikitDataset
from matplotlib import pyplot as plt
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay, classification_report

import pandas as pd
import numpy as np
import cv2

##### Preprocessing Data

In [4]:
# Function to preprocess data
def preprocess(data: list, mode: str = 'crop'):
    """
    Function to preprocess data to match the Random Forest Classifier from scikit-learn.
    The fitting function expects:
    
    * X: Training input samples, {array-like} of shape (n_samples, n_features), where n-dim <= 2
    * y: Target values, array-like of shape (n_samples) or (n_samples, n_outputs), where n-dim <= 2
    * sample_weight: If None, samples are equally weighted.
    
    Parameters:
    -----------
    - data: list\n
    A list containing the features and labels of the data. 
     
    - mode: str, `default = "crop" | "crop", "bbox"`\n
      Used to specify whether we are preprocessing bounding box coordinates, images or image crops. By default `<mode>` is set to preprocess bounding box coordines.
    """
    
    # Reshaping labels to 1D-array because classifier expect dim <= 2
    print("before flattening:",data[1])
    labels = np.array(data[1]).flatten()
    #labels = data[1].numpy()
    print("reshaped labels:",labels)
    
    if mode == "crop":
      # Convert tensors to numpy, then find the maximum height and width among all crops
      features = [image.numpy() for image in data[0]]
      max_height = max(img.shape[0] for img in features)
      max_width = max(img.shape[1] for img in features)
      #print(f"max_height, max_width: ({max_height},{max_width})")

      # Add centered padding around the crop to match max height and width
      # To counter floor divison we add +1
      features = [np.pad(img,pad_width=(((max_height - img.shape[0])//2, (max_height - img.shape[0] + 1)//2),
                                      ((max_width - img.shape[1])//2, (max_width - img.shape[1] + 1)//2)),
                                        mode="constant", constant_values=0.0) for img in features]
     
      # Reshaping features from 3D to 2D
      features = np.array(features)
      samples,nx,ny = features.shape
      features = features.reshape(samples,nx*ny)
      return features, labels
      
    elif mode == "bbox":
      features = [coord for coord in data[0]]
      features = np.array(features)
      
      return features, labels
      
    #return features, labels, [org3d, org2d, reshaped]
    #return features, labels

##### Setting up custom dataset and dataloader

In [50]:
le = LabelEncoder()
le.fit(['human-ride-bicycle', 'human-walk-bicycle', 'human-hold-bicycle', 'human-ride-motorcycle', 'human-walk-motorcycle'])

train_dataset = CropsScikitDataset(anno_file=r'../annotations_hoi_frame.csv', img_dir = r'../images', label_encoder=le)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

test_dataset = CropsScikitDataset(anno_file=r'annotations_hoi_frame_741.csv', img_dir = r'../images', label_encoder=le)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)

##### Training loop

In [51]:
# Training loop
epochs = 5
seed = 42
rf_classifier = RandomForestClassifier(n_estimators=1, random_state=seed, warm_start=True)

img, label, coord = next(iter(train_dataloader))
features, labels = preprocess((coord, label), mode="bbox")
rf_classifier.fit(features,labels)

for epoch in (range(epochs - 1)):
    print(f"\nINFO: EPOCH {epoch+1}/{epochs}")
    rf_classifier.n_estimators += 1 # Adding another tree
    img, label, coord = next(iter(train_dataloader))
    features, labels = preprocess((coord, label), mode="bbox")
    rf_classifier.fit(features,labels)
    
    
    #for frames, labels, coords in tqdm(train_dataloader,total=(len(train_dataloader)),leave=True):
    #    #features, labels = preprocess((frames, labels), mode="crop")
    #    features, labels = preprocess((coords, labels), mode="bbox")
    #    rf_classifier.fit(features,labels)
    #    rf_classifier.n_estimators += 1 # Adding another tree

before flattening: (tensor([1], dtype=torch.int32), tensor([1], dtype=torch.int32))
reshaped labels: [1 1]

INFO: EPOCH 1/5
before flattening: (tensor([1], dtype=torch.int32), tensor([3], dtype=torch.int32))
reshaped labels: [1 3]

INFO: EPOCH 2/5
before flattening: (tensor([1], dtype=torch.int32), tensor([1], dtype=torch.int32))
reshaped labels: [1 1]

INFO: EPOCH 3/5
before flattening: (tensor([1], dtype=torch.int32), tensor([1], dtype=torch.int32))
reshaped labels: [1 1]

INFO: EPOCH 4/5
before flattening: (tensor([1], dtype=torch.int32), tensor([1], dtype=torch.int32))
reshaped labels: [1 1]


In [52]:
print(rf_classifier.estimators_)
print(rf_classifier.predict([[100, 100, 50, 50], [100, 100, 50, 50], [100, 100, 50, 50], [100, 100, 50, 50]]))

[DecisionTreeClassifier(max_features='sqrt', random_state=1608637542), DecisionTreeClassifier(max_features='sqrt', random_state=1273642419), DecisionTreeClassifier(max_features='sqrt', random_state=1935803228), DecisionTreeClassifier(max_features='sqrt', random_state=787846414), DecisionTreeClassifier(max_features='sqrt', random_state=996406378)]


ValueError: non-broadcastable output operand with shape (4,1) doesn't match the broadcast shape (4,2)

##### Testing classifier on our data

In [None]:
# Testing classifier on a new testset
predictions = []
labels = []
for batch in tqdm(test_dataloader, total=len(test_dataloader)):
    frames, targets, coords = batch
    
    # Preprocess data
    features, label = preprocess((coords,targets), mode="bbox")
    labels.append(label)
    
    print(features)
    print(coords)
    # Make predictions on the new test set
    predictions.append(rf_classifier.predict(coords))

# Evaluate the classifier
accuracy = accuracy_score(labels, predictions)
#print(f"Accuracy: {accuracy}")
#print(classification_report(labels, predictions))
# Compute accuracy by confusion matrix
cm_matrix = confusion_matrix(labels, predictions)
# Plot confusion matrix
cm_matrix = ConfusionMatrixDisplay(confusion_matrix=cm_matrix, display_labels=rf_classifier.classes_)
#cm_matrix.plot()
for pred, truth in zip(predictions, labels):
    if pred != truth:
        print(f"Incorrect Prediction: {le.inverse_transform([pred])}, Actual: {le.inverse_transform([truth])}")


##### Training clf on bbox coordinates

In [None]:
# Training classifier on bbox coordinates
print(data_list[533][1])
print(len(data_list))
print(le.inverse_transform(np.ravel(data_list[0][1])))

# Separate features and labels in separate lists
# Reshaping labels cause classifier expect dim <= 2
features, labels = zip(*data_list)
labels = np.array(labels).reshape(-1)
features = np.array([coord[0] for coord in features])
print(labels.shape)
#print(features)
print(features.shape)
#features = [image[0].reshape(-1).numpy() for image in features]

# Split the data into training and testing sets with random seed
seed = 1
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=seed)

# Creating a RandomForestClassifier with 100 trees with random seed
# Trying to fit the features (cropped frames of human+object)
rf_classifier = RandomForestClassifier(n_estimators=10, random_state=seed)
rf_classifier.fit(features,labels)

# Make predictions on the test set
predictions = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, predictions))


# Compute accuracy by confusion matrix
cm_matrix = confusion_matrix(y_test, predictions)

# Plot confusion matrix
cm_matrix = ConfusionMatrixDisplay(confusion_matrix=cm_matrix, display_labels=rf_classifier.classes_)
cm_matrix.plot()

for pred, truth in zip(predictions, y_test):
    if pred != truth:
        print(f"Incorrect Prediction: {le.inverse_transform([pred])}, Actual: {le.inverse_transform([truth])}")

"""
### Observation ###
Due to different shapes of the cropped frames
i cant train the classifier on the features.

I might try padding the crops?

Trying to pass coordinates instead, works.
"""

##### Training clf on cropped imgs

In [None]:
# Training classifier on cropped imgs
features, labels, plots = preprocess(data_list2, 10)

# Split the data into training and testing sets with random seed
seed = 1
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.8, random_state=seed)

# Creating a RandomForestClassifier with 10 trees with random seed
# Trying to fit the features (cropped frames of human+object)
rf_classifier = RandomForestClassifier(n_estimators=1, random_state=seed)
rf_classifier.fit(features,labels)

# Make predictions on the test set
predictions = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, predictions))


# Compute accuracy by confusion matrix
cm_matrix = confusion_matrix(y_test, predictions)

# Plot confusion matrix
cm_matrix = ConfusionMatrixDisplay(confusion_matrix=cm_matrix, display_labels=rf_classifier.classes_)
cm_matrix.plot()

for pred, truth in zip(predictions, y_test):
    if pred != truth:
        print(f"Incorrect Prediction: {le.inverse_transform([pred])}, Actual: {le.inverse_transform([truth])}")

plt.figure(figsize=(10,4))
plt.subplot(1,3,1)
plt.title("3d img")
plt.imshow(plots[0])

plt.subplot(1,3,2)
plt.title("2d img")
plt.imshow(plots[1], cmap='gray', aspect='auto')

plt.subplot(1,3,3)
plt.title("padded 2d img")
plt.imshow(plots[2], cmap='gray', aspect='auto')
plt.tight_layout()