In [30]:
import requests
import os
import sys
import pandas as pd
import numpy as np


from typing import Optional
import glob
from scipy import stats
# Add the parent directory of this notebook to sys.path
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
parent_dir = os.path.dirname(notebook_dir)
sys.path.append(parent_dir)

from project_tools import project_utils, project_class

import datetime
import json
from tqdm.notebook import tqdm
import gc
# import ds_utils
import random
import matplotlib.pyplot as plt
%matplotlib inline

from importlib import reload
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.float_format', lambda x: '%.0f' % x)

# Or for more precise control
pd.set_option('display.float_format', '{:.0f}'.format)

In [33]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, ndcg_score
from scipy.stats import kendalltau, spearmanr

In [36]:
def evaluate_horse_race_positions(y_true, y_pred_proba):
    n_races, n_horses = y_pred_proba.shape
    
    # Convert probabilities to predicted rankings
    y_pred_ranks = n_horses - np.argsort(y_pred_proba, axis=1)
    
    # Mean Squared Error
    mse = mean_squared_error(y_true, y_pred_ranks)
    
    # Mean Absolute Error
    mae = mean_absolute_error(y_true, y_pred_ranks)
    
    # Spearman's Rank Correlation
    spearman_corr = np.mean([kendalltau(y_true[i], y_pred_ranks[i]).correlation 
                             for i in range(n_races)])
    
    # Normalized Discounted Cumulative Gain (NDCG)
    ndcg = ndcg_score(y_true.reshape(1, -1), y_pred_proba.reshape(1, -1))
    
    # winner match
    winner_match = np.mean([y_true_processed[i][0] == y_pred_ranks[i][0] for i in range(n_races)])

    # Top 3 Set Match - considers [1,3,2] and [2,3,1] as matching
    top3_set_match = np.mean([set(y_true_processed[i][:3]) == set(y_pred_ranks[i][:3])
                             for i in range(n_races)])

    # Top 3 Exact Match
    top3_exact_match = np.mean([np.array_equal(np.sort(y_true_processed[i][:3]), np.sort(y_pred_ranks[i][:3])) 
                               for i in range(n_races)])
    
    return {
        'Mean Squared Error': mse,
        'Mean Absolute Error': mae,
        "Spearman's Rank Correlation": spearman_corr,
        'NDCG': ndcg,
        'Winner Match': winner_match,
        'Top 3 Set Match': top3_set_match,
        'Top 3 Exact Match': top3_exact_match,
    }

In [43]:
y_pred_ranks

array([[5, 1, 3, 2, 4]])

In [52]:
print(y_true_processed)
y_true_processed

[[4 1 3 2 5]]


array([[4, 1, 3, 2, 5]])

In [54]:
print(np.argsort(y_true_processed))
print(np.argsort(y_pred_ranks))

[[1 3 2 0 4]]
[[1 3 2 4 0]]


In [48]:
print(y_pred_proba)
y_pred_ranks = n_horses - np.argsort(y_pred_proba, axis=1)
y_pred_ranks

[[0.1 0.4 0.2 0.2 0.1]]


array([[5, 1, 3, 2, 4]])

In [60]:
# step by step function walk-through

y_true = np.array([
    [4, 1, 3, 2, 99],
])

y_pred_proba = np.array([
    [0.1, 0.4, 0.2, 0.2, 0.1],
    # [0.3, 0.1, 0.4, 0.1, 0.1],
    # [0.2, 0.4, 0.1, 0.3, 0.1]
])

n_races, n_horses = y_pred_proba.shape
y_true_processed = y_true.copy()
dnf_value = 99

# Convert probabilities to predicted rankings
y_pred_ranks = n_horses - np.argsort(y_pred_proba, axis=1)

i = 0
invalid_mask = np.logical_or(
    np.isnan(y_true[i]),
    y_true[i] == dnf_value
)

valid_ranks = y_true[i][~invalid_mask]
if len(valid_ranks) > 0:
    max_rank = np.max(valid_ranks)
    # Replace invalid values with max_rank + 1
    y_true_processed[i][invalid_mask] = max_rank + 1

print(invalid_mask, valid_ranks, y_true_processed, y_pred_ranks)

ndcg = ndcg_score(y_true_processed.reshape(1, -1), y_pred_proba.reshape(1, -1))
print(ndcg)

spearman_corr = np.mean([spearmanr(y_true_processed[i], y_pred_ranks[i]).correlation 
                             for i in range(n_races)])

print(spearman_corr)

# winner match
y_true_ranksort = np.argsort(y_true_processed, axis=1)
y_pred_ranksort = np.argsort(y_pred_ranks, axis=1)

winner_match = np.mean([y_true_ranksort[i][0] == y_pred_ranksort[i][0] for i in range(n_races)])
# Top 3 Set Match - considers [1,3,2] and [2,3,1] as matching
top3_set_match = np.mean([set(y_true_ranksort[i][:3]) == set(y_pred_ranksort[i][:3])
                         for i in range(n_races)])

# Top 3 Exact Match - only considers exact matches like [1,3,2] and [1,3,2]
top3_exact_match = np.mean([np.array_equal(y_true_ranksort[i][:3], y_pred_ranksort[i][:3])
                           for i in range(n_races)])

print(y_true_ranksort[i][:3], y_pred_ranksort[i][:3])
print(winner_match, top3_set_match, top3_exact_match)


[False False False False  True] [4 1 3 2] [[4 1 3 2 5]] [[5 1 3 2 4]]
0.73074974489826
0.8999999999999998
[1 3 2] [1 3 2]
1.0 1.0 1.0


array([[1, 2, 3]])

In [25]:
# y_pred_ranks = n_horses - np.argsort(y_pred_proba, axis=1)

k = 1
top_k_pred = np.argsort(-y_pred_proba, axis=1)[:, :k]
top_k_true = np.argsort(y_true_processed, axis=1)[:, :k]
top_k_accuracy = np.mean([
    len(set(top_k_pred[i]) & set(top_k_true[i])) / k 
    for i in range(n_races)
])
top_k_accuracy

1.0

In [28]:
# y_pred_ranks = n_horses - np.argsort(y_pred_proba, axis=1)

k = 3
top_k_pred = np.argsort(-y_pred_proba, axis=1)[:, :k]
top_k_true = np.argsort(y_true_processed, axis=1)[:, :k]
top_k_accuracy = np.mean([
    len(set(top_k_pred[i]) & set(top_k_true[i])) / k 
    for i in range(n_races)
])

print(top_k_true, top_k_pred)
top_k_accuracy

[[1 3 2]] [[1 2 3]]


1.0