In [None]:
from google.colab import drive

drive.mount('/content/drive')

import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Import celeste dataset
celeste_path = '/content/drive/Shared drives/WE ARE SPEED/dataset/users_final_games2.csv'

df_celeste = pd.read_csv(celeste_path)

Testing from here ...

In [None]:
#  Only keep unique PlayerID-GameName pairs
## Get interactions of players to games so that the model can know who the players are after splitting the dataset
interactions = df_celeste[['PlayerID', 'GameName']].drop_duplicates()

# Split the interactions into 70% train, 30% test
train_interactions, test_interactions = train_test_split(interactions, test_size=0.3, random_state=42)

# Pivot into matrices
train_matrix = train_interactions.pivot_table(index='PlayerID', columns='GameName', aggfunc=lambda x: 1, fill_value=0)
test_matrix = test_interactions.pivot_table(index='PlayerID', columns='GameName', aggfunc=lambda x: 1, fill_value=0)

# Convert train matrix to sparse format
sparse_train_matrix = csr_matrix(train_matrix.values)

# Train the KNN model
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(sparse_train_matrix)

In [None]:
def recommend_games(player_id, player_game_matrix, model, n_recommendations=5):
    if player_id not in player_game_matrix.index:
        return []

    player_idx = player_game_matrix.index.get_loc(player_id)

    # <<< FIX: use .values to remove feature names
    distances, indices = model.kneighbors(player_game_matrix.iloc[[player_idx]].values, n_neighbors=6)

    neighbor_games = player_game_matrix.iloc[indices[0][1:]]  # skip self
    neighbor_game_counts = neighbor_games.sum(axis=0)

    player_games_row = player_game_matrix.iloc[player_idx]
    unplayed_games = (player_games_row == 0)

    recommendations = neighbor_game_counts[unplayed_games].sort_values(ascending=False).head(n_recommendations)

    return list(recommendations.index)


In [None]:
def precision_recall_hit_at_k(model, player_game_matrix, true_matrix, k=5):
    precisions = []
    recalls = []
    hits = []

    for player_id in true_matrix.index:
        if player_id not in player_game_matrix.index:
            continue

        true_games = set(true_matrix.columns[true_matrix.loc[player_id] > 0])

        if not true_games:
            continue

        recommended_games = recommend_games(player_id, player_game_matrix, model, n_recommendations=k)
        recommended_set = set(recommended_games)

        num_hits = len(recommended_set & true_games)

        precision = num_hits / k
        recall = num_hits / len(true_games)
        hit = 1 if num_hits > 0 else 0  # 1 if at least one hit, else 0

        precisions.append(precision)
        recalls.append(recall)
        hits.append(hit)

    if not precisions:
        return 0, 0, 0  # Avoid ZeroDivisionError

    avg_precision = sum(precisions) / len(precisions)
    avg_recall = sum(recalls) / len(recalls)
    hit_rate = sum(hits) / len(hits)

    return avg_precision, avg_recall, hit_rate

# Evaluate on test_matrix
precision, recall, hit_rate = precision_recall_hit_at_k(knn, train_matrix, test_matrix, k=5)

print(f"Test Precision@5: {precision:.4f}")
print(f"Test Recall@5: {recall:.4f}")
print(f"Test Hit Rate@5: {hit_rate:.4f}")


Test Precision@5: 0.0565
Test Recall@5: 0.1453
Test Hit Rate@5: 0.2405
