In [1]:
from datetime import datetime, timedelta
import pandas as pd
from mma_ranking.ranking.preprocess import load_train_data  # , read_fights
from mma_ranking.ranking.network_ranking import (
    # fighter_perfomance,
    # calculate_rank,
    generate_ranks_weightclass,
    normalizing_page_rank_per_class,
    ranking_dictionary,
    adjust_ranking_based_on_matches,
    get_fighters_in_weightclass
)
from mma_ranking.config import N_YEARS, WEIGHTCLASS_LIST

# path = r"\data\all_fights_new.csv"
path = r"..\..\data\all_fights_2024-04-28.csv"
start_date = "2014-01-01"
number_years = 5  # number of years to train the model

# setting pandas to show all columns and rows
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

# shot down warnings
pd.options.mode.chained_assignment = None


def subtract_years(date, years, is_str=True):
    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")
    if is_str:
        return (date - timedelta(days=365.25 * years)).strftime("%Y-%m-%d")
    else:
        return date - timedelta(days=365.25 * years)

In [2]:
# define the start date for the training data
today_date = datetime.now().strftime("%Y-%m-%d")

start_date = subtract_years(today_date, N_YEARS)

# load the all matches data to train the graph model.
df = load_train_data(path=path)

# filter the data to only include the matches after the start date for ranking adjustments
matches_sorted = df[df["Date"] >= start_date][
    ["Date", "winner", "losser", "BELT", "Weight", "method", "value", "draw"]
]
matches_sorted = matches_sorted.sort_values(by="Date", ascending=True)

print("matches_df.shape", matches_sorted.shape)
print(f"max date: {matches_sorted['Date'].dt.date.max()}")
print(f"min date: {matches_sorted['Date'].dt.date.min()}")

# generate the ranking for each weight class
rankings_df = generate_ranks_weightclass(df)
run_date = rankings_df["run_date"].max()

# list of the all available weight classes
weight_class_list = matches_sorted["Weight"].unique()
print("number of weight classes: ", len(weight_class_list))
print("weight classes: ", weight_class_list)

matches_df.shape (2476, 8)
max date: 2024-04-13
min date: 2019-06-22
number of weight classes:  13
weight classes:  ["Women'sFlyweight" 'Heavyweight' 'Featherweight' 'Welterweight'
 "Women's Strawweight" 'Lightweight' 'Bantamweight' 'Middleweight'
 'Light Heavyweight' 'Flyweight' "Women's Bantamweight"
 "Women's Featherweight" 'other']


In [39]:
# getting the fighters for each weight class
df_fighter_weight = get_fighters_in_weightclass(matches_sorted, minimum_fights=3)
df_fighter_weight.head()

Unnamed: 0,fighter,Weight,counts
1,AJ Dobson,Middleweight,4
2,AJ Fletcher,Welterweight,4
6,Abdul Razak Alhassan,Middleweight,5
10,Abubakar Nurmagomedov,Welterweight,4
13,Adrian Yanez,Bantamweight,7


In [2]:
%pwd

'c:\\Users\\alire\\OneDrive\\personal_projects\\mma_ranking\\Notebooks\\UFC_ranking'

In [7]:
rankings_df[rankings_df['weight']=='Lightweight'].head()

Unnamed: 0,fighter,page_rank,run_date,perf_rat,n_fights,adj_p_rank,weight,wight_class_rank
0,Max Holloway,0.004567,2024-04-13,1.0,1.0,0.004567,Lightweight,1
1,Islam Makhachev,0.004491,2024-04-13,1.0,5.0,0.004491,Lightweight,2
2,Ilia Topuria,0.003078,2024-04-13,1.0,1.0,0.003078,Lightweight,3
3,Dustin Poirier,0.004996,2024-04-13,0.6,5.0,0.002998,Lightweight,4
4,Charles Oliveira,0.003988,2024-04-13,0.666667,6.0,0.002659,Lightweight,5


In [6]:
rankings_df_new[rankings_df_new['weight']=='Lightweight'].head()

Unnamed: 0,fighter,page_rank,run_date,perf_rat,n_fights,adj_p_rank,weight,wight_class_rank,adj_p_rank_scaled
0,Max Holloway,0.004567,2024-04-13,1.0,1.0,0.004567,Lightweight,1,1.0
1,Islam Makhachev,0.004491,2024-04-13,1.0,5.0,0.004491,Lightweight,2,0.98326
2,Ilia Topuria,0.003078,2024-04-13,1.0,1.0,0.003078,Lightweight,3,0.674028
3,Dustin Poirier,0.004996,2024-04-13,0.6,5.0,0.002998,Lightweight,4,0.656327
4,Charles Oliveira,0.003988,2024-04-13,0.666667,6.0,0.002659,Lightweight,5,0.582142


In [None]:
# normalizing the page rank per class
rankings_df_new = normalizing_page_rank_per_class(
    rankings_df, WEIGHTCLASS_LIST, run_date
)

all_weight_classes_updated_ranking = pd.DataFrame()

for _weight_class in WEIGHTCLASS_LIST:
    print(f"weight class: {_weight_class}\t, run_date: {run_date.date()}")

    # dictionary of the fighters ranking for each weight class
    _fighter_rankings = ranking_dictionary(rankings_df_new, _weight_class, run_date)

    updated_rankings = adjust_ranking_based_on_matches(
        matches_sorted=matches_sorted,
        fighter_rankings=_fighter_rankings,
        ranking_column="wight_class_rank",
        page_rank_column="adj_p_rank_scaled",
        epsilon=0.001,
        verbose=True,
    )

    updated_rankings["weight_class"] = _weight_class
    updated_rankings["run_date"] = run_date

    all_weight_classes_updated_ranking = pd.concat(
        [all_weight_classes_updated_ranking, updated_rankings]
    )