In [None]:
# import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.adapt import MLkNN
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from PIL import Image
import matplotlib.pyplot as plt

# load pedestrian tracks data
tracks_data = pd.read_csv("tracks.csv", na_values=["None"])

# load agent class label info data
agent_class_label_info = pd.read_csv("agent_class_label_info.csv")

# load semantic map label info data
semantic_map_label_info = pd.read_csv("semantic_map_label_info.csv")

# create dictionary to map agent class labels to integers
label_dict = {}
for index, row in agent_class_label_info.iterrows():
    label_dict[row['class']] = row['id']

# replace agent class labels with integers in tracks data
tracks_data = tracks_data.replace({"other_class": label_dict})

# create list of labels
labels = list(label_dict.values())

# load semantic map image and display
map_image = Image.open("semantic_map.png")
plt.imshow(np.asarray(map_image))
plt.show()

# preprocessing
# remove columns with high percentage of missing values
tracks_data = tracks_data.dropna(axis = 0)

# fill missing values with median
tracks_data = tracks_data.fillna(tracks_data.median())

# convert x and y coordinates of other agents to numpy arrays
tracks_data['other_x'] = tracks_data['other_x'].apply(lambda x: np.array(eval(x)))
tracks_data['other_y'] = tracks_data['other_y'].apply(lambda x: np.array(eval(x)))

# combine x and y coordinates of other agents into one column
tracks_data['other_pos'] = tracks_data.apply(lambda x: np.vstack((x['other_x'], x['other_y'])).T, axis=1)
tracks_data = tracks_data.drop(columns=['other_x', 'other_y'])

# encode class labels
encoder = LabelEncoder()
tracks_data['other_class'] = encoder.fit_transform(tracks_data['other_class'])

# print(tracks_data.head())
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(tracks_data.drop(columns=['oid', 'other_pos']), tracks_data['other_class'], test_size=0.3, random_state=42)

print(X_train.shape)
print(y_train.shape)
# define classifiers
rfc = RandomForestClassifier()
gbc = GradientBoostingClassifier()
knn = KNeighborsClassifier()


# # define hyperparameters for tuning

def tune_hyperparameters(model, param_grid, X, y):
    clf = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    clf.fit(X, y)
    print(f"Best parameters: {clf.best_params_}")
    print(f"Best cross-validation accuracy: {clf.best_score_}")
    return clf.best_estimator_

# perform grid search with cross-validation to find best hyper

rf = RandomForestClassifier(random_state=42)
param_grid = {'n_estimators': [100, 200, 500],
              'max_depth': [5, 10, None],
              'max_features': ['sqrt', 'log2']}
rf_tuned = tune_hyperparameters(rf, param_grid, X_train, y_train)

# Gradient Boosting
gb = GradientBoostingClassifier(random_state=42)
param_grid = {'n_estimators': [100, 200, 500],
              'max_depth': [3, 5, None],
              'learning_rate': [0.01, 0.1, 1.0]}
gb_tuned = tune_hyperparameters(gb, param_grid, X_train, y_train)

# K-Nearest Neighbors
knn = KNeighborsClassifier()
param_grid = {'n_neighbors: [3, 5, 7, 9],
    'weights': ['uniform', 'distance']
}
knn_grid = GridSearchCV(knn, param_grid, cv=5)
knn_grid.fit(X_train, y_train)

# Make predictions on the testing set for each model
rfc_y_pred = rfc_grid.predict(X_test)
gbc_y_pred = gbc_grid.predict(X_test)
knn_y_pred = knn_grid.predict(X_test)

# # Evaluate the accuracy of each model
rfc_acc = accuracy_score(y_test, rfc_y_pred)
gbc_acc = accuracy_score(y_test, gbc_y_pred)
# knn_acc = accuracy_score(y_test, knn_y_pred)

print('Random Forest accuracy:', rfc_acc)
print('Gradient Boosting accuracy:', gbc_acc)
# print('K-Nearest Neighbors accuracy:', knn_acc)