In [1]:
from datetime import datetime, timedelta
import pandas as pd
from mma_ranking.ranking.preprocess import load_train_data  # , read_fights
from mma_ranking.ranking.network_ranking import (
    # fighter_perfomance,
    # calculate_rank,
    generate_ranks_weightclass,
    normalizing_page_rank_per_class,
    ranking_dictionary,
    adjust_ranking_based_on_matches,
    get_fighters_in_weightclass
)
from mma_ranking.config import N_YEARS, WEIGHTCLASS_LIST

# path = r"\data\all_fights_new.csv"
path = r"..\..\data\all_fights_2024-12-23.csv"
start_date = "2014-01-01"
number_years = 5  # number of years to train the model

# setting pandas to show all columns and rows
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

# shot down warnings
pd.options.mode.chained_assignment = None


def subtract_years(date, years, is_str=True):
    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")
    if is_str:
        return (date - timedelta(days=365.25 * years)).strftime("%Y-%m-%d")
    else:
        return date - timedelta(days=365.25 * years)

In [2]:
# define the start date for the training data
today_date = datetime.now().strftime("%Y-%m-%d")

start_date = subtract_years(today_date, N_YEARS)

# load the all matches data to train the graph model.
df = load_train_data(path=path)

# filter the data to only include the matches after the start date for ranking adjustments
matches_sorted = df[df["Date"] >= start_date][
    ["Date", "winner", "losser", "BELT", "Weight", "method", "value", "draw"]
]
matches_sorted = matches_sorted.sort_values(by="Date", ascending=True)

print("matches_df.shape", matches_sorted.shape)
print(f"max date: {matches_sorted['Date'].dt.date.max()}")
print(f"min date: {matches_sorted['Date'].dt.date.min()}")

# generate the ranking for each weight class
rankings_df = generate_ranks_weightclass(df)
run_date = rankings_df["run_date"].max()

# list of the all available weight classes
weight_class_list = matches_sorted["Weight"].unique()
print("number of weight classes: ", len(weight_class_list))
print("weight classes: ", weight_class_list)

matches_df.shape (2519, 8)
max date: 2024-12-07
min date: 2020-01-18
number of weight classes:  13
weight classes:  ['Lightweight' 'Heavyweight' "Women'sFlyweight" 'Bantamweight'
 "Women's Bantamweight" 'Light Heavyweight' 'Flyweight' 'Featherweight'
 'Welterweight' "Women's Strawweight" 'Middleweight'
 "Women's Featherweight" 'other']


In [3]:
# getting the fighters for each weight class
df_fighter_weight = get_fighters_in_weightclass(matches_sorted, minimum_fights=3)
df_fighter_weight.head()

Unnamed: 0,fighter,Weight,counts
1,AJ Dobson,Middleweight,4
2,AJ Fletcher,Welterweight,4
6,Abdul Razak Alhassan,Middleweight,6
11,Abus Magomedov,Middleweight,5
12,Adam Fugitt,Welterweight,4


In [4]:
%pwd

'c:\\Users\\alire\\OneDrive\\personal_projects\\mma_ranking\\Notebooks\\UFC_ranking'

In [5]:
rankings_df[rankings_df['weight']=='Lightweight'].head()

Unnamed: 0,fighter,page_rank,run_date,perf_rat,n_fights,adj_p_rank,weight,wight_class_rank
0,Islam Makhachev,0.004745,2024-12-07,1.0,4.0,0.004745,Lightweight,1
1,Max Holloway,0.004123,2024-12-07,1.0,1.0,0.004123,Lightweight,2
2,Ilia Topuria,0.003485,2024-12-07,1.0,1.0,0.003485,Lightweight,3
3,Charles Oliveira,0.003682,2024-12-07,0.666667,6.0,0.002455,Lightweight,4
4,Dan Hooker,0.002199,2024-12-07,1.0,3.0,0.002199,Lightweight,5


In [7]:
# normalizing the page rank per class
rankings_df_new = normalizing_page_rank_per_class(
    rankings_df, WEIGHTCLASS_LIST, run_date
)

all_weight_classes_updated_ranking = pd.DataFrame()

for _weight_class in WEIGHTCLASS_LIST:
    print(f"weight class: {_weight_class}\t, run_date: {run_date.date()}")

    # dictionary of the fighters ranking for each weight class
    _fighter_rankings = ranking_dictionary(rankings_df_new, _weight_class, run_date)

    updated_rankings = adjust_ranking_based_on_matches(
        matches_sorted=matches_sorted,
        fighter_rankings=_fighter_rankings,
        ranking_column="wight_class_rank",
        page_rank_column="adj_p_rank_scaled",
        epsilon=0.001,
        verbose=True,
    )

    updated_rankings["weight_class"] = _weight_class
    updated_rankings["run_date"] = run_date

    all_weight_classes_updated_ranking = pd.concat(
        [all_weight_classes_updated_ranking, updated_rankings]
    )

weight class: Flyweight	, run_date: 2024-12-07
date: 2020-01-18 00:00:00
Before change --> winner name Askar Askarov --> rank: 50 
 loser name Tim Elliott --> rank: 8
-------------------
After change --> winner name Askar Askarov --> rank: 8 
 loser name Tim Elliott --> rank: 9
-------------------

date: 2020-03-14 00:00:00
Before change --> winner name David Dvorak --> rank: 50 
 loser name Bruno Silva --> rank: 10
-------------------
After change --> winner name David Dvorak --> rank: 10 
 loser name Bruno Silva --> rank: 11
-------------------

date: 2020-07-18 00:00:00
Before change --> winner name Askar Askarov --> rank: 8 
 loser name Alexandre Pantoja --> rank: 2
-------------------
After change --> winner name Askar Askarov --> rank: 2 
 loser name Alexandre Pantoja --> rank: 3
-------------------

date: 2020-10-10 00:00:00
Before change --> winner name Tagir Ulanbekov --> rank: 15 
 loser name Bruno Silva --> rank: 11
-------------------
After change --> winner name Tagir Ulan

In [9]:
# rankings_df_new[rankings_df_new['weight']=='Lightweight'].head(20)

Unnamed: 0,fighter,page_rank,run_date,perf_rat,n_fights,adj_p_rank,weight,wight_class_rank,adj_p_rank_scaled
0,Islam Makhachev,0.004745,2024-12-07,1.0,4.0,0.004745,Lightweight,1,1.0
1,Max Holloway,0.004123,2024-12-07,1.0,1.0,0.004123,Lightweight,2,0.868861
2,Ilia Topuria,0.003485,2024-12-07,1.0,1.0,0.003485,Lightweight,3,0.734414
3,Charles Oliveira,0.003682,2024-12-07,0.666667,6.0,0.002455,Lightweight,4,0.51726
4,Dan Hooker,0.002199,2024-12-07,1.0,3.0,0.002199,Lightweight,5,0.463433
5,Dustin Poirier,0.004544,2024-12-07,0.4,5.0,0.001818,Lightweight,6,0.383039
6,Grant Dawson,0.002049,2024-12-07,0.833333,6.0,0.001707,Lightweight,7,0.359827
7,Arman Tsarukyan,0.002001,2024-12-07,0.833333,6.0,0.001668,Lightweight,8,0.35146
8,Mateusz Gamrot,0.002297,2024-12-07,0.714286,7.0,0.00164,Lightweight,9,0.345681
9,Renato Moicano,0.001593,2024-12-07,1.0,5.0,0.001593,Lightweight,10,0.335803


In [14]:
all_weight_classes_updated_ranking.query("weight_class=='Lightweight'").head(15)

Unnamed: 0,Name,rank,page_rank,weight_class,run_date
0,Islam Makhachev,1,1.0,Lightweight,2024-12-07
1,Ilia Topuria,2,0.870861,Lightweight,2024-12-07
2,Alexander Volkanovski,3,0.869861,Lightweight,2024-12-07
3,Max Holloway,4,0.868861,Lightweight,2024-12-07
4,Arman Tsarukyan,5,0.51826,Lightweight,2024-12-07
5,Charles Oliveira,6,0.51726,Lightweight,2024-12-07
6,Justin Gaethje,7,0.465433,Lightweight,2024-12-07
7,Dustin Poirier,8,0.464433,Lightweight,2024-12-07
8,Michael Chandler,9,0.464433,Lightweight,2024-12-07
9,Dan Hooker,10,0.463433,Lightweight,2024-12-07


In [15]:
all_weight_classes_updated_ranking.query("weight_class=='Featherweight'").head(15)

Unnamed: 0,Name,rank,page_rank,weight_class,run_date
0,Ilia Topuria,1,1.0,Featherweight,2024-12-07
1,Movsar Evloev,2,0.81445,Featherweight,2024-12-07
2,Aljamain Sterling,3,0.643958,Featherweight,2024-12-07
3,Alexander Volkanovski,4,0.592534,Featherweight,2024-12-07
4,Max Holloway,5,0.591534,Featherweight,2024-12-07
5,Julio Arce,6,0.423764,Featherweight,2024-12-07
6,Shane Burgos,7,0.424764,Featherweight,2024-12-07
7,Felipe Lima,8,0.426764,Featherweight,2024-12-07
8,Muhammad Naimov,9,0.425764,Featherweight,2024-12-07
9,Nathaniel Wood,10,0.424764,Featherweight,2024-12-07


In [18]:
all_weight_classes_updated_ranking.weight_class.unique()

array(['Flyweight', 'Bantamweight', 'Featherweight', 'Lightweight',
       'Welterweight', 'Middleweight', 'Light Heavyweight', 'Heavyweight',
       "Women's Strawweight", "Women'sFlyweight", "Women's Bantamweight",
       "Women's Featherweight", 'other'], dtype=object)

In [19]:
all_weight_classes_updated_ranking.query("weight_class=='Middleweight'").head(15)

Unnamed: 0,Name,rank,page_rank,weight_class,run_date
0,Dricus Du Plessis,1,1.0,Middleweight,2024-12-07
1,Sean Strickland,2,0.927823,Middleweight,2024-12-07
2,Israel Adesanya,3,0.926823,Middleweight,2024-12-07
3,Alex Pereira,4,0.925823,Middleweight,2024-12-07
4,Khamzat Chimaev,5,0.869369,Middleweight,2024-12-07
5,Robert Whittaker,6,0.690084,Middleweight,2024-12-07
6,Nassourdine Imavov,7,0.691084,Middleweight,2024-12-07
7,Caio Borralho,8,0.691084,Middleweight,2024-12-07
8,Jared Cannonier,9,0.690084,Middleweight,2024-12-07
9,Marvin Vettori,10,0.689084,Middleweight,2024-12-07


In [20]:
all_weight_classes_updated_ranking.query("weight_class=='Heavyweight'").head(15)

Unnamed: 0,Name,rank,page_rank,weight_class,run_date
0,Jon Jones,1,1.0,Heavyweight,2024-12-07
1,Francis Ngannou,2,0.716842,Heavyweight,2024-12-07
2,Ciryl Gane,3,0.527978,Heavyweight,2024-12-07
3,Tom Aspinall,4,0.529978,Heavyweight,2024-12-07
4,Alexander Volkov,5,0.529978,Heavyweight,2024-12-07
5,Sergei Pavlovich,6,0.528978,Heavyweight,2024-12-07
6,Serghei Spivac,7,0.529978,Heavyweight,2024-12-07
7,Marcin Tybura,8,0.528978,Heavyweight,2024-12-07
8,Jairzinho Rozenstruik,9,0.528978,Heavyweight,2024-12-07
9,Tai Tuivasa,10,0.527978,Heavyweight,2024-12-07
