In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
from rapidfuzz import fuzz
from sklearn.impute import KNNImputer
import pickle
from typing import Dict
from sklearn.preprocessing import OneHotEncoder

In [2]:
pd.set_option("display.max_columns", None)

In [3]:
stats_cleaned = pd.read_csv('../data/notebooks/stats_cleaned.csv')
events_cleaned = pd.read_csv('../data/notebooks/events_cleaned.csv')

Merge events and stats together per red and blue fighter and their equivalent stats.

The names have been normalized. But in the vent that there are still a couple names that might missmatch, I will create a fuzzy pipeline match to merge the names ratio, token sort ratio and token set ratio.

In [4]:
def create_fuzzy_mapping(event_names, stats_names, threshold=85):
    """
    Create a mapping dictionary from events fighter names to stats names
    using fuzzy matching.
    """
    mapping = {}

    for event_name in event_names:
        if pd.isna(event_name):
            continue

        best_match = None
        best_score = 0

        for stats_name in stats_names:
            if pd.isna(stats_name):
                continue

            # Calculate fuzzy match scores
            ratio = fuzz.ratio(event_name, stats_name)
            token_sort_ratio = fuzz.token_sort_ratio(event_name, stats_name)
            token_set_ratio = fuzz.token_set_ratio(event_name, stats_name)
            score = max(ratio, token_sort_ratio, token_set_ratio)
            
            if score >= threshold and score > best_score:
                best_match = stats_name
                best_score = score

        if best_match:
            mapping[event_name] = best_match
        else:
            mapping[event_name] = event_name # Keep original if no match found

    return mapping


In [5]:
# Get unique fighter names from events
blue_fighters = events_cleaned['fighter_blue'].dropna().unique()
red_fighters = events_cleaned['fighter_red'].dropna().unique()
all_event_fighters = set(blue_fighters) | set(red_fighters)

# Get unique names from stats
stats_names = stats_cleaned['name'].dropna().unique()

# Create fuzzy mapping
print("Creating fuzzy mapping...")
fuzzy_mapping = create_fuzzy_mapping(all_event_fighters, stats_names, threshold=85)

# Apply fuzzy mapping to create new columns for merging
events_cleaned['fighter_blue_mapped'] = events_cleaned['fighter_blue'].map(fuzzy_mapping)
events_cleaned['fighter_red_mapped'] = events_cleaned['fighter_red'].map(fuzzy_mapping)

# Fill NaN values with original names (for cases where mapping didn't work)
events_cleaned['fighter_blue_mapped'] = events_cleaned['fighter_blue_mapped'].fillna(events_cleaned['fighter_blue'])
events_cleaned['fighter_red_mapped'] = events_cleaned['fighter_red_mapped'].fillna(events_cleaned['fighter_red'])

# Now perform the merges using the mapped names
# Merge stats for fighter_blue
df_merged = events_cleaned.merge(
    stats_cleaned,
    how='left',
    left_on='fighter_blue_mapped',
    right_on='name',
    suffixes=('', '_drop')
)

# Rename fighter_blue stats columns with _blue suffix (excluding the original 'name')
cols_to_rename_blue = [col for col in stats_cleaned.columns if col != 'name']
df_merged.rename(columns={col: f"{col}_blue" for col in cols_to_rename_blue}, inplace=True)
df_merged.drop(columns=['name', 'fighter_blue_mapped'], inplace=True)

# Merge stats for fighter_red
df_merged = df_merged.merge(
    stats_cleaned,
    how='left',
    left_on='fighter_red_mapped',
    right_on='name',
    suffixes=('', '_drop')
)

# Rename fighter_red stats columns with _red suffix
cols_to_rename_red = [col for col in stats_cleaned.columns if col != 'name']
df_merged.rename(columns={col: f"{col}_red" for col in cols_to_rename_red}, inplace=True)
df_merged.drop(columns=['name', 'fighter_red_mapped'], inplace=True)

# Print some statistics about the fuzzy matching
print(f"Total unique fighters in events: {len(all_event_fighters)}")
print(f"Total unique names in stats: {len(stats_names)}")
exact_matches = sum(1 for k, v in fuzzy_mapping.items() if k == v)
fuzzy_matches = len(fuzzy_mapping) - exact_matches
print(f"Exact matches: {exact_matches}")
print(f"Fuzzy matches: {fuzzy_matches}")
print(f"No matches found: {len(all_event_fighters) - len(fuzzy_mapping)}")

# Show some examples of fuzzy matches
print("\nSample fuzzy matches:")
fuzzy_examples = [(k, v) for k, v in fuzzy_mapping.items() if k != v][:10]
for original, matched in fuzzy_examples:
    print(f"'{original}' -> '{matched}'")

Creating fuzzy mapping...


Total unique fighters in events: 2593
Total unique names in stats: 3076
Exact matches: 2505
Fuzzy matches: 88
No matches found: 0

Sample fuzzy matches:
'josh rafferty' -> 'josh raferty'
'dmitri stepanov' -> 'dmitrei stepanov'
'brad scott' -> 'bradley scott'
'dwayne cason' -> 'duane cason'
'ben alloway' -> 'benny alloway'
'waldo cortes-acosta' -> 'waldo cortes acosta'
'ebenezer fontes braga' -> 'ebenezer braga'
'zachary scroggin' -> 'zach scroggin'
'bibulatov magomed' -> 'magomed bibulatov'
'alex stiebling' -> 'alex steibling'


In [6]:
df_merged.to_csv("../data/notebooks/merged_clean.csv", index = False)

In [7]:
df_merged.isnull().sum()

event_date                               0
event_name                               0
fighter_red                              0
fighter_blue                             0
round                                    0
                                      ... 
sig_strikes_absorbed_per_minute_red    129
takedowns_avg_red                      129
submission_avg_red                     129
knockdown_avg_red                      129
fight_time_avg_red                     129
Length: 75, dtype: int64

In [8]:
missing_df = pd.DataFrame({
    'missing_count': df_merged.isnull().sum(),
    'missing_precent': round((df_merged.isnull().sum() / len(df_merged)) * 100, 2)
}).sort_values(by='missing_precent', ascending=False)

missing_df

Unnamed: 0,missing_count,missing_precent
fight_time_avg_red,129,1.58
knockdown_avg_red,129,1.58
submission_avg_red,129,1.58
takedowns_avg_red,129,1.58
sig_strikes_absorbed_per_minute_red,129,1.58
...,...,...
takedowns_blue,0,0.00
takedowns_red,0,0.00
sub_attempts_blue,0,0.00
sub_attempts_red,0,0.00


It seems that even after normalizing all the fighter names and fuzzy mathcing them with a threshold of 85% some names still do not match.
After investigating more on the official [UFC](https://www.ufc.com/athletes/all) website, indeed these fighter names do not have any official stats.

In [9]:
df_merged[df_merged['fight_time_avg_red'].isnull()].tail(5)

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red
5290,2019-11-09,UFC Fight Night: Zabit vs. Kattar,zelim imadaev,danny roberts,2,294,Welterweight,KO/TKO,0,Orthodox,Southpaw,0,1,71,61,28,37,84,68,38,42,0,0,1,1,1,8,151,138,15,10,54,34,10,17,12,17,3,10,5,10,19,18,58,40,9,19,11,21,0,0,2,0,"Dec. 10, 2015",73.0,171.0,74.0,42.0,3.0,4.0,0.24,0.24,0.35,545.0,,,,,,,,,,,
5594,2020-08-15,UFC 252: Miocic vs. Cormier 3,junior dos santos,jairzinho rozenstruik,2,227,Heavyweight,KO/TKO,0,Orthodox,Orthodox,0,1,44,66,24,42,44,66,24,42,0,0,0,0,0,0,0,9,6,21,22,41,13,1,15,2,5,20,7,23,24,30,44,49,0,0,0,0,0,12,0,17,"Feb. 2, 2019",74.0,252.0,78.0,41.0,3.0,3.0,0.0,0.0,0.74,567.0,,,,,,,,,,,
5770,2020-12-12,UFC 256: Figueiredo vs. Moreno,junior dos santos,ciryl gane,2,154,Heavyweight,KO/TKO,0,Orthodox,Orthodox,0,1,35,80,10,59,36,82,11,61,0,0,0,0,1,0,0,25,6,13,26,28,3,17,7,20,1,29,2,32,10,53,35,74,0,4,0,4,0,2,0,2,"Aug. 10, 2019",76.0,245.5,81.0,42.0,5.0,2.0,0.7,0.61,0.26,855.0,,,,,,,,,,,
5970,2021-05-15,UFC 262: Oliveira vs. Chandler,jacare souza,andre muniz,1,239,Middleweight,Submission,0,Orthodox,Southpaw,0,0,19,12,11,7,23,26,15,19,0,1,1,2,2,3,53,70,5,3,13,8,4,2,4,2,2,2,2,2,7,6,13,11,0,1,0,1,4,0,6,0,"Nov. 16, 2019",73.0,185.0,78.0,43.0,2.0,2.0,4.02,1.19,0.0,549.0,,,,,,,,,,,
7771,2024-11-23,UFC Fight Night: Yan vs. Figueiredo,kiru sahota,donghun choi,1,156,Flyweight,KO/TKO,0,Orthodox,Switch,0,1,23,19,2,12,26,19,5,12,0,0,0,0,0,0,0,1,0,6,17,12,0,2,2,3,2,4,4,4,2,12,23,17,0,0,0,1,0,0,0,1,"Nov. 23, 2024",65.8,125.5,68.4,37.0,5.0,3.0,0.46,0.0,0.92,652.0,,,,,,,,,,,


In [10]:
# Find rows where red/blue fighter stats are missing
unmatched_red_rows = df_merged[df_merged['fight_time_avg_red'].isnull()]
unmatched_blue_rows = df_merged[df_merged['fight_time_avg_blue'].isnull()]

# Get unique unmatched fighter names for red and blue
unmatched_red_names = unmatched_red_rows['fighter_red'].dropna().unique()
unmatched_blue_names = unmatched_blue_rows['fighter_blue'].dropna().unique()

# Combine into a single array of unique names
unmatched_names = pd.unique(
    pd.concat([pd.Series(unmatched_red_names), pd.Series(unmatched_blue_names)])
)

unmatched_names

array(['david abbott', 'geza kalman', 'steven graham', 'lance gibson',
       'chael sonnen', 'quinton jackson', 'mirko filipovic',
       'thiago silva', 'war machine', 'manvel gamburyan', 'goran reljic',
       'yoshiyuki yoshida', 'junior dos santos', 'kevin ferguson',
       'jonathan brookins', 'maciej jewtuszko', 'constantinos philippou',
       'godofredo pepey', 'damacio page', 'daniel sarafian',
       'jacare souza', 'joao zeferino', 'ramiro hernandez',
       'kazuki tokudome', 'renee forte', 'edimilson souza',
       'nicholas musoke', 'tiago dos santos e silva', 'yuta sasaki',
       'cristiane justino', 'alberto uda', 'eduardo garagorri',
       'zelim imadaev', 'kiru sahota', 'marcus silveira', 'jason godsey',
       'david roberts', 'joe jordan', 'kristof midoux',
       'edilberto de oliveira', 'brandon melendez', 'billy miles',
       'ryan roberts', 'david mitchell', 'chris tickle', 'fernando bruno',
       'leonardo guimaraes', 'alex ricci', 'azunna anyanwu',
      

There are two ways to proceed.

1. Drop the rows where these fighters exist.

This will lead to inaccuracies, due to the fact that the other fighter in the bout will also be dropped leading to his record not being properly tracked.

2. Impute the missing values.

Better approach, so no values will be dropped and we can have the whole fighter history.

In [11]:
print(df_merged.columns[df_merged.isnull().any()])
print("\n", len(df_merged.columns[df_merged.isnull().any()]))

Index(['octagon_debut_blue', 'height_blue', 'weight_blue', 'reach_blue',
       'leg_reach_blue', 'sig_strikes_landed_per_minute_blue',
       'sig_strikes_absorbed_per_minute_blue', 'takedowns_avg_blue',
       'submission_avg_blue', 'knockdown_avg_blue', 'fight_time_avg_blue',
       'octagon_debut_red', 'height_red', 'weight_red', 'reach_red',
       'leg_reach_red', 'sig_strikes_landed_per_minute_red',
       'sig_strikes_absorbed_per_minute_red', 'takedowns_avg_red',
       'submission_avg_red', 'knockdown_avg_red', 'fight_time_avg_red'],
      dtype='object')

 22


Fill octagon_debut with the min value of the event_date of the fighter. Basically the first fight a fighter fought in the UFC.

In [12]:
df_merged['octagon_debut_blue'] = pd.to_datetime(df_merged['octagon_debut_blue'], errors='coerce')
df_merged['octagon_debut_red'] = pd.to_datetime(df_merged['octagon_debut_red'], errors='coerce')

In [13]:
fighters_long = pd.concat([
    events_cleaned[['fighter_red', 'event_date']].rename(columns={'fighter_red': 'fighter'}),
    events_cleaned[['fighter_blue', 'event_date']].rename(columns={'fighter_blue': 'fighter'})
], ignore_index=True)

fighter_debuts = fighters_long.groupby('fighter')['event_date'].min().reset_index()
fighter_debuts.rename(columns={'event_date': 'octagon_debut'}, inplace=True)

df_merged = df_merged.merge(
    fighter_debuts, left_on='fighter_red', right_on='fighter', how='left'
)

df_merged.loc[df_merged['octagon_debut_red'].isna(), 'octagon_debut_red'] = \
    df_merged.loc[df_merged['octagon_debut_red'].isna(), 'octagon_debut']

df_merged.drop(columns='fighter', inplace=True)
df_merged.drop(columns='octagon_debut', inplace=True)

df_merged = df_merged.merge(
    fighter_debuts, left_on='fighter_blue', right_on='fighter', how='left'
)

df_merged.loc[df_merged['octagon_debut_blue'].isna(), 'octagon_debut_blue'] = \
    df_merged.loc[df_merged['octagon_debut_blue'].isna(), 'octagon_debut']

df_merged.drop(columns='fighter', inplace=True)
df_merged.drop(columns='octagon_debut', inplace=True)

In [14]:
df_merged[df_merged['fighter_red'] == 'alex pereira']

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red
6213,2021-11-06,UFC 268: Usman vs. Covington 2,alex pereira,andreas michailidis,2,18,Middleweight,KO/TKO,1,Orthodox,Orthodox,1,0,18,11,11,3,33,19,25,11,0,0,0,2,0,7,4,239,4,3,10,6,5,0,6,2,2,0,2,3,4,0,9,5,6,2,7,5,1,1,2,1,2020-07-15,72.0,170.5,76.0,41.0,3.0,4.0,1.12,0.0,0.0,605.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0
6379,2022-03-12,UFC Fight Night: Santos vs. Ankalaev,alex pereira,bruno silva,3,300,Middleweight,Decision - Unanimous,1,Orthodox,Orthodox,0,0,179,147,108,59,239,160,165,72,0,0,0,2,0,8,77,195,60,39,120,119,41,8,50,16,7,12,9,12,86,52,151,134,22,7,27,13,0,0,1,0,2019-09-07,64.0,125.0,65.0,35.0,4.0,5.0,1.79,0.16,0.98,615.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0
6916,2023-04-08,UFC 287: Pereira vs. Adesanya 2,alex pereira,israel adesanya,2,261,Middleweight,KO/TKO,0,Orthodox,Switch,0,1,85,96,49,41,85,96,49,41,0,0,0,0,0,0,0,3,8,18,28,61,15,14,21,24,26,9,36,11,48,40,83,93,1,0,2,2,0,1,0,1,2018-02-11,76.0,185.0,80.0,44.5,4.0,3.0,0.05,0.14,0.62,1055.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0
7451,2024-04-13,UFC 300: Pereira vs. Hill,alex pereira,jamahal hill,1,194,Open Weight,KO/TKO,1,Orthodox,Southpaw,1,0,30,22,24,12,30,22,24,12,0,0,0,0,0,0,7,0,12,1,15,7,6,3,6,6,6,8,9,9,13,12,16,22,0,0,0,0,11,0,14,0,2020-01-25,76.0,206.0,79.0,43.5,6.0,4.0,0.0,0.0,0.48,622.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0
7562,2024-06-29,UFC 303: Pereira vs. Prochazka 2,alex pereira,jiri prochazka,2,13,Open Weight,KO/TKO,1,Orthodox,Orthodox,2,0,50,25,38,7,61,25,49,7,0,0,0,0,0,0,3,71,21,4,32,20,8,2,9,4,9,1,9,1,21,6,31,24,7,1,7,1,10,0,12,0,2020-07-11,75.0,204.5,80.0,45.0,6.0,6.0,0.6,0.2,0.79,647.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0
7694,2024-10-05,UFC 307: Pereira vs. Rountree Jr.,alex pereira,khalil rountree jr,4,272,Open Weight,KO/TKO,1,Orthodox,Southpaw,1,0,209,191,127,61,211,193,129,62,0,0,0,0,0,0,2,0,92,34,167,155,17,16,20,23,18,11,22,13,123,60,205,189,4,1,4,2,0,0,0,0,2016-07-09,73.0,206.0,76.5,44.0,4.0,4.0,0.0,0.09,1.3,572.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0
7900,2025-03-08,UFC 313: Pereira vs. Ankalaev,alex pereira,magomed ankalaev,5,300,Open Weight,Decision - Unanimous,0,Orthodox,Orthodox,0,0,137,180,76,94,159,224,97,127,0,0,0,0,1,12,0,342,11,36,57,112,17,30,25,39,48,28,55,29,69,75,130,159,7,19,7,21,0,0,0,0,2018-03-17,75.0,205.0,75.0,46.0,4.0,2.0,0.8,0.0,0.56,751.0,2021-11-06,76.0,205.0,79.0,44.0,5.0,4.0,0.11,0.23,0.8,717.0


Impute data

In [15]:
print(df_merged.columns[df_merged.isnull().any()])
print("\n", len(df_merged.columns[df_merged.isnull().any()]))

Index(['height_blue', 'weight_blue', 'reach_blue', 'leg_reach_blue',
       'sig_strikes_landed_per_minute_blue',
       'sig_strikes_absorbed_per_minute_blue', 'takedowns_avg_blue',
       'submission_avg_blue', 'knockdown_avg_blue', 'fight_time_avg_blue',
       'height_red', 'weight_red', 'reach_red', 'leg_reach_red',
       'sig_strikes_landed_per_minute_red',
       'sig_strikes_absorbed_per_minute_red', 'takedowns_avg_red',
       'submission_avg_red', 'knockdown_avg_red', 'fight_time_avg_red'],
      dtype='object')

 20


In [16]:
missing_cols = df_merged.columns[df_merged.isnull().any()]

imputer = KNNImputer(n_neighbors=5)
df_merged[missing_cols] = imputer.fit_transform(df_merged[missing_cols])

# Save as pickle
with open("../models/knn_imputer_feature_engineering.pkl", "wb") as f:
    pickle.dump(imputer, f)

In [17]:
df_merged.isnull().sum()

event_date                             0
event_name                             0
fighter_red                            0
fighter_blue                           0
round                                  0
                                      ..
sig_strikes_absorbed_per_minute_red    0
takedowns_avg_red                      0
submission_avg_red                     0
knockdown_avg_red                      0
fight_time_avg_red                     0
Length: 75, dtype: int64

Convert dates to datetime

In [18]:
df_merged['event_date'] = pd.to_datetime(df_merged['event_date'], errors='coerce')

In [19]:
df_merged.sort_values(by=['event_date'], inplace=True)
df_merged.reset_index(drop=True, inplace=True)

Create average value per stat feature

In [20]:
df_merged

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red
0,1994-03-11,UFC 2: No Way Out,jason delucia,scott baker,1,401,Open Weight,Submission,1,Southpaw,Orthodox,0,0,5,2,3,0,25,23,20,14,5,0,0,1,1,1,0,0,1,0,2,2,1,0,2,0,1,0,1,0,2,0,4,2,0,0,0,0,1,0,1,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,71.000000,190.000000,75.800000,40.600000,0.0,0.0,0.00,0.00,0.00,0.0
1,1994-03-11,UFC 2: No Way Out,royce gracie,patrick smith,1,77,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,4,2,4,1,11,3,11,2,0,0,1,0,2,0,0,0,3,0,3,0,0,1,0,2,1,0,1,0,0,0,0,1,1,1,1,1,3,0,3,0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0
2,1994-03-11,UFC 2: No Way Out,scott morris,sean daugherty,1,20,Open Weight,Submission,1,Orthodox,Orthodox,0,0,1,4,1,0,2,5,2,1,1,0,1,0,1,0,0,0,1,0,1,2,0,0,0,0,0,0,0,2,0,0,0,3,1,0,1,1,0,0,0,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0
3,1994-03-11,UFC 2: No Way Out,remco pardoel,alberta cerra leon,1,591,Open Weight,Submission,1,Southpaw,Orthodox,0,0,6,3,4,1,22,11,20,9,1,0,1,0,1,0,0,0,4,1,5,3,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,4,0,5,2,2025-09-27,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0
4,1994-03-11,UFC 2: No Way Out,orlando wiet,robert lucarelli,1,170,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,12,6,8,2,15,6,11,2,0,1,0,1,0,1,0,0,7,1,11,2,1,0,1,1,0,1,0,3,1,2,3,6,0,0,0,0,7,0,9,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8177,2025-09-13,UFC Fight Night: Lopes vs. Silva,alessandro costa,alden coria,3,47,Flyweight,KO/TKO,0,Orthodox,Orthodox,0,0,56,95,12,42,59,131,14,77,2,0,1,2,4,2,84,198,7,34,37,85,1,1,6,3,4,7,13,7,12,29,56,79,0,0,0,0,0,13,0,16,2025-09-13,68.000000,125.000000,67.000000,38.200000,4.0,1.0,2.78,0.00,0.00,647.0,2022-12-17,64.000000,125.000000,67.000000,36.000000,4.0,4.0,0.46,0.46,0.69,656.0
8178,2025-09-13,UFC Fight Night: Lopes vs. Silva,montse rendon,alice pereira,3,300,Women's Bantamweight,Decision - Split,1,Orthodox,Orthodox,0,0,166,117,45,37,185,123,60,43,1,0,0,0,5,3,105,19,31,29,136,103,6,7,14,12,8,1,16,2,43,35,163,115,2,2,3,2,0,0,0,0,2025-09-13,70.140541,166.505447,71.587092,39.902128,2.0,3.0,0.00,0.00,0.00,900.0,2023-09-23,68.000000,135.000000,68.000000,41.000000,3.0,3.0,2.00,0.33,0.00,900.0
8179,2025-09-13,UFC Fight Night: Lopes vs. Silva,rob font,david martinez,3,300,Bantamweight,Decision - Unanimous,0,Orthodox,Orthodox,0,0,158,155,49,62,158,210,49,115,0,0,0,0,5,2,59,33,40,33,138,115,7,10,17,16,2,19,3,24,49,62,158,155,0,0,0,0,0,0,0,0,2025-03-29,65.000000,135.000000,67.500000,37.500000,5.0,3.0,0.00,0.00,0.43,693.0,2014-07-05,68.000000,138.000000,71.500000,38.500000,5.0,4.0,0.74,0.34,0.40,789.0
8180,2025-09-13,UFC Fight Night: Lopes vs. Silva,jose daniel medina,dusko todorovic,1,261,Middleweight,Submission,0,Orthodox,Orthodox,0,0,25,24,9,16,29,36,13,27,0,1,0,2,0,2,0,136,5,10,17,18,3,6,7,6,1,0,1,0,9,10,24,18,0,6,1,6,0,0,0,0,2019-08-27,73.000000,186.000000,74.000000,41.000000,5.0,4.0,1.77,0.35,0.00,461.0,2024-08-24,72.000000,186.000000,74.500000,40.000000,2.0,5.0,0.40,0.00,0.00,568.0


In [21]:
red_fights = df_merged[['event_date','event_name','fighter_red','round','time','weight_class','win_method','winner','stance_red',
                        'knockdowns_red','sig_attempts_red','sig_strikes_red','total_strikes_attempts_red','total_strikes_red',
                        'sub_attempts_red','takedowns_red','takedown_attempts_red','control_time_red','head_strikes_red','head_attempts_red',
                        'body_strikes_red','body_attempts_red','leg_strikes_red','leg_attempts_red','distance_red','distance_attempts_red',
                        'clinch_strikes_red','clinch_attempts_red','ground_strikes_red','ground_attempts_red',]].copy()

# Rename columns but keep fighter_red as fighter_name to avoid conflict
red_fights.columns = ['fighter_name' if col == 'fighter_red' else col.replace('_red', '') if '_red' in col else col for col in red_fights.columns]
red_fights.columns = [col.replace('_red', '') if '_red' in col else col for col in red_fights.columns]
red_fights['corner'] = 'red'

blue_fights = df_merged[['event_date','event_name','fighter_blue','round','time','weight_class','win_method','winner','stance_blue',
                        'knockdowns_blue','sig_attempts_blue','sig_strikes_blue','total_strikes_attempts_blue','total_strikes_blue',
                        'sub_attempts_blue','takedowns_blue','takedown_attempts_blue','control_time_blue','head_strikes_blue','head_attempts_blue',
                        'body_strikes_blue','body_attempts_blue','leg_strikes_blue','leg_attempts_blue','distance_blue','distance_attempts_blue',
                        'clinch_strikes_blue','clinch_attempts_blue','ground_strikes_blue','ground_attempts_blue',]].copy()

blue_fights.columns = ['fighter_name' if col == 'fighter_blue' else col.replace('_blue', '') if '_blue' in col else col for col in blue_fights.columns]
blue_fights.columns = [col.replace('_blue', '') if '_blue' in col else col for col in blue_fights.columns]
blue_fights['corner'] = 'blue'

red_fights['opponent'] = df_merged['fighter_blue']
blue_fights['opponent'] = df_merged['fighter_red']

all_fights = pd.concat([red_fights, blue_fights]).sort_values(['fighter_name', 'event_date'])

all_fights

Unnamed: 0,event_date,event_name,fighter_name,round,time,weight_class,win_method,winner,stance,knockdowns,sig_attempts,sig_strikes,total_strikes_attempts,total_strikes,sub_attempts,takedowns,takedown_attempts,control_time,head_strikes,head_attempts,body_strikes,body_attempts,leg_strikes,leg_attempts,distance,distance_attempts,clinch_strikes,clinch_attempts,ground_strikes,ground_attempts,corner,opponent
5413,2020-02-29,UFC Fight Night: Benavidez vs. Figueiredo,aalon cruz,1,85,Featherweight,KO/TKO,0,Switch,0,12,2,12,2,0,0,1,5,2,4,0,4,0,4,2,12,0,0,0,0,red,spike carlyle
5878,2021-03-06,UFC 259: Blachowicz vs. Adesanya,aalon cruz,1,100,Lightweight,KO/TKO,1,Switch,0,3,0,3,0,0,0,2,11,0,2,0,1,0,0,0,3,0,0,0,0,blue,uros medic
227,2000-11-17,UFC 28: High Stakes,aaron brink,1,55,Heavyweight,Submission,1,Orthodox,0,5,0,5,0,0,0,1,4,0,5,0,0,0,0,0,3,0,2,0,0,blue,andrei arlovski
2654,2014-05-24,UFC 173: Barao vs Dillashaw,aaron phillips,3,300,Featherweight,Decision - Unanimous,1,Southpaw,0,40,18,137,109,1,0,0,68,5,23,12,14,1,3,9,26,3,6,6,8,blue,sam sicilia
2788,2014-08-23,UFC Fight Night: Henderson vs Dos Anjos,aaron phillips,3,300,Bantamweight,Decision - Unanimous,0,Southpaw,0,54,38,279,230,0,0,1,0,23,39,12,12,3,3,14,25,17,18,7,11,red,matt hobar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5658,2020-09-26,UFC 253: Adesanya vs. Costa,zubaira tukhugov,3,300,Featherweight,Decision - Split,1,Orthodox,0,93,35,98,40,0,1,7,123,31,88,4,5,0,0,34,92,0,0,1,1,blue,hakeem dawodu
6206,2021-10-30,UFC 267: Blachowicz vs. Teixeira,zubaira tukhugov,3,300,Featherweight,Decision - Unanimous,0,Orthodox,0,181,77,185,78,0,2,7,55,73,174,2,5,2,2,77,181,0,0,0,0,blue,ricardo ramos
6830,2023-02-11,UFC 284: Makhachev vs. Volkanovski,zubaira tukhugov,3,300,Lightweight,Decision - Split,0,Orthodox,0,169,70,172,73,0,0,4,131,64,160,4,7,2,2,68,167,2,2,0,0,red,elves brener
6187,2021-10-23,UFC Fight Night: Costa vs. Vettori,zviad lazishvili,3,300,Bantamweight,Decision - Unanimous,1,Orthodox,0,160,63,163,66,0,0,6,90,25,115,17,22,21,23,60,153,3,7,0,0,blue,jonathan martinez


In [22]:
def calculate_fighter_features(df):

    df = df.sort_values('event_date')

    # Calculate expanding/rolling features (using previous fights only)
    df['avg_rounds'] = df['round'].expanding().mean().shift(1)
    df['avg_time'] = df['time'].expanding().mean().shift(1)
    df['avg_knockdowns'] = df['knockdowns'].expanding().mean().shift(1)
    df['avg_sig_attempts'] = df['sig_attempts'].expanding().mean().shift(1)
    df['avg_sig_strikes'] = df['sig_strikes'].expanding().mean().shift(1)
    df['avg_total_strikes_attempts'] = df['total_strikes_attempts'].expanding().mean().shift(1)
    df['avg_total_strikes'] = df['total_strikes'].expanding().mean().shift(1)
    df['avg_sub_attempts'] = df['sub_attempts'].expanding().mean().shift(1)
    df['avg_takedowns'] = df['takedowns'].expanding().mean().shift(1)
    df['avg_takedown_attempts'] = df['takedown_attempts'].expanding().mean().shift(1)
    df['avg_head_strikes'] = df['head_strikes'].expanding().mean().shift(1)
    df['avg_head_attempts'] = df['head_attempts'].expanding().mean().shift(1)
    df['avg_body_strikes'] = df['body_strikes'].expanding().mean().shift(1)
    df['avg_body_attempts'] = df['body_attempts'].expanding().mean().shift(1)
    df['avg_leg_strikes'] = df['leg_strikes'].expanding().mean().shift(1)
    df['avg_leg_attempts'] = df['leg_attempts'].expanding().mean().shift(1)
    df['avg_distance'] = df['distance'].expanding().mean().shift(1)
    df['avg_distance_attempts'] = df['distance_attempts'].expanding().mean().shift(1)
    df['avg_clinch_strikes'] = df['clinch_strikes'].expanding().mean().shift(1)
    df['avg_clinch_attempts'] = df['clinch_attempts'].expanding().mean().shift(1)
    df['avg_ground_strikes'] = df['ground_strikes'].expanding().mean().shift(1)
    df['avg_ground_attempts'] = df['ground_attempts'].expanding().mean().shift(1)
    
    return df

In [23]:
# Apply to each fighter
fighter_features = all_fights.groupby('fighter_name').apply(calculate_fighter_features, include_groups=False)

fighter_features

Unnamed: 0_level_0,Unnamed: 1_level_0,event_date,event_name,round,time,weight_class,win_method,winner,stance,knockdowns,sig_attempts,sig_strikes,total_strikes_attempts,total_strikes,sub_attempts,takedowns,takedown_attempts,control_time,head_strikes,head_attempts,body_strikes,body_attempts,leg_strikes,leg_attempts,distance,distance_attempts,clinch_strikes,clinch_attempts,ground_strikes,ground_attempts,corner,opponent,avg_rounds,avg_time,avg_knockdowns,avg_sig_attempts,avg_sig_strikes,avg_total_strikes_attempts,avg_total_strikes,avg_sub_attempts,avg_takedowns,avg_takedown_attempts,avg_head_strikes,avg_head_attempts,avg_body_strikes,avg_body_attempts,avg_leg_strikes,avg_leg_attempts,avg_distance,avg_distance_attempts,avg_clinch_strikes,avg_clinch_attempts,avg_ground_strikes,avg_ground_attempts
fighter_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1
aalon cruz,5413,2020-02-29,UFC Fight Night: Benavidez vs. Figueiredo,1,85,Featherweight,KO/TKO,0,Switch,0,12,2,12,2,0,0,1,5,2,4,0,4,0,4,2,12,0,0,0,0,red,spike carlyle,,,,,,,,,,,,,,,,,,,,,,
aalon cruz,5878,2021-03-06,UFC 259: Blachowicz vs. Adesanya,1,100,Lightweight,KO/TKO,1,Switch,0,3,0,3,0,0,0,2,11,0,2,0,1,0,0,0,3,0,0,0,0,blue,uros medic,1.000000,85.000000,0.000000,12.000000,2.000000,12.000000,2.000000,0.0,0.000000,1.000000,2.000000,4.000000,0.000000,4.000000,0.000000,4.000000,2.0,12.000000,0.000000,0.000000,0.000000,0.000000
aaron brink,227,2000-11-17,UFC 28: High Stakes,1,55,Heavyweight,Submission,1,Orthodox,0,5,0,5,0,0,0,1,4,0,5,0,0,0,0,0,3,0,2,0,0,blue,andrei arlovski,,,,,,,,,,,,,,,,,,,,,,
aaron phillips,2654,2014-05-24,UFC 173: Barao vs Dillashaw,3,300,Featherweight,Decision - Unanimous,1,Southpaw,0,40,18,137,109,1,0,0,68,5,23,12,14,1,3,9,26,3,6,6,8,blue,sam sicilia,,,,,,,,,,,,,,,,,,,,,,
aaron phillips,2788,2014-08-23,UFC Fight Night: Henderson vs Dos Anjos,3,300,Bantamweight,Decision - Unanimous,0,Southpaw,0,54,38,279,230,0,0,1,0,23,39,12,12,3,3,14,25,17,18,7,11,red,matt hobar,3.000000,300.000000,0.000000,40.000000,18.000000,137.000000,109.000000,1.0,0.000000,0.000000,5.000000,23.000000,12.000000,14.000000,1.000000,3.000000,9.0,26.000000,3.000000,6.000000,6.000000,8.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zubaira tukhugov,5658,2020-09-26,UFC 253: Adesanya vs. Costa,3,300,Featherweight,Decision - Split,1,Orthodox,0,93,35,98,40,0,1,7,123,31,88,4,5,0,0,34,92,0,0,1,1,blue,hakeem dawodu,2.200000,272.400000,0.400000,78.600000,30.600000,82.000000,33.400000,0.0,1.400000,3.000000,24.800000,68.400000,4.800000,9.000000,1.000000,1.200000,22.6,65.600000,3.000000,5.600000,5.000000,7.400000
zubaira tukhugov,6206,2021-10-30,UFC 267: Blachowicz vs. Teixeira,3,300,Featherweight,Decision - Unanimous,0,Orthodox,0,181,77,185,78,0,2,7,55,73,174,2,5,2,2,77,181,0,0,0,0,blue,ricardo ramos,2.333333,277.000000,0.333333,81.000000,31.333333,84.666667,34.500000,0.0,1.333333,3.666667,25.833333,71.666667,4.666667,8.333333,0.833333,1.000000,24.5,70.000000,2.500000,4.666667,4.333333,6.333333
zubaira tukhugov,6830,2023-02-11,UFC 284: Makhachev vs. Volkanovski,3,300,Lightweight,Decision - Split,0,Orthodox,0,169,70,172,73,0,0,4,131,64,160,4,7,2,2,68,167,2,2,0,0,red,elves brener,2.428571,280.285714,0.285714,95.285714,37.857143,99.000000,40.714286,0.0,1.428571,4.142857,32.571429,86.285714,4.285714,7.857143,1.000000,1.142857,32.0,85.857143,2.142857,4.000000,3.714286,5.428571
zviad lazishvili,6187,2021-10-23,UFC Fight Night: Costa vs. Vettori,3,300,Bantamweight,Decision - Unanimous,1,Orthodox,0,160,63,163,66,0,0,6,90,25,115,17,22,21,23,60,153,3,7,0,0,blue,jonathan martinez,,,,,,,,,,,,,,,,,,,,,,


In [24]:
# Evaluate a fighter
fighter_features = fighter_features.reset_index()
fighter_features

test_fighter = fighter_features[fighter_features['fighter_name'] == 'jon jones'].head(5)
test_fighter

Unnamed: 0,fighter_name,level_1,event_date,event_name,round,time,weight_class,win_method,winner,stance,knockdowns,sig_attempts,sig_strikes,total_strikes_attempts,total_strikes,sub_attempts,takedowns,takedown_attempts,control_time,head_strikes,head_attempts,body_strikes,body_attempts,leg_strikes,leg_attempts,distance,distance_attempts,clinch_strikes,clinch_attempts,ground_strikes,ground_attempts,corner,opponent,avg_rounds,avg_time,avg_knockdowns,avg_sig_attempts,avg_sig_strikes,avg_total_strikes_attempts,avg_total_strikes,avg_sub_attempts,avg_takedowns,avg_takedown_attempts,avg_head_strikes,avg_head_attempts,avg_body_strikes,avg_body_attempts,avg_leg_strikes,avg_leg_attempts,avg_distance,avg_distance_attempts,avg_clinch_strikes,avg_clinch_attempts,avg_ground_strikes,avg_ground_attempts
7935,jon jones,931,2008-08-09,UFC 87: Seek And Destroy,3,300,Light Heavyweight,Decision - Unanimous,1,Orthodox,0,102,42,129,68,0,2,5,256,25,73,16,24,1,5,13,46,26,51,3,5,red,andre gusmao,,,,,,,,,,,,,,,,,,,,,,
7936,jon jones,1027,2009-01-31,UFC 94: St-Pierre vs Penn 2,3,300,Light Heavyweight,Decision - Unanimous,1,Orthodox,1,62,45,89,70,0,7,10,251,24,35,16,22,5,5,13,23,22,27,10,12,red,stephan bonnar,3.0,300.0,0.0,102.0,42.0,129.0,68.0,0.0,2.0,5.0,25.0,73.0,16.0,24.0,1.0,5.0,13.0,46.0,26.0,51.0,3.0,5.0
7937,jon jones,1123,2009-07-11,UFC 100,2,163,Light Heavyweight,Submission,1,Orthodox,0,87,25,87,25,1,0,0,35,11,63,6,8,8,16,24,86,1,1,0,0,red,jake obrien,3.0,300.0,0.5,82.0,43.5,109.0,69.0,0.0,4.5,7.5,24.5,54.0,16.0,23.0,3.0,5.0,13.0,34.5,24.0,39.0,6.5,8.5
7938,jon jones,1208,2009-12-05,The Ultimate Fighter: Heavyweights Finale,1,254,Light Heavyweight,DQ,1,Orthodox,0,88,44,106,55,0,1,1,84,33,76,7,8,4,4,9,25,2,2,33,61,blue,matt hamill,2.666667,254.333333,0.333333,83.666667,37.333333,101.666667,54.333333,0.333333,3.0,5.0,20.0,57.0,12.666667,18.0,4.666667,8.666667,16.666667,51.666667,16.333333,26.333333,4.333333,5.666667
7939,jon jones,1277,2010-03-21,UFC Live: Vera vs Jones,1,199,Light Heavyweight,KO/TKO,0,Orthodox,0,17,11,33,26,0,2,3,164,9,14,1,1,1,2,1,3,1,1,9,13,blue,brandon vera,2.25,254.25,0.25,84.75,39.0,102.75,54.5,0.25,2.5,4.0,23.25,61.75,11.25,15.5,4.5,7.5,14.75,45.0,12.75,20.25,11.5,19.5


In [25]:
# Reset index first
fighter_features = fighter_features.reset_index()

# Separate red and blue features
red_features = fighter_features[fighter_features['corner'] == 'red'].copy()
blue_features = fighter_features[fighter_features['corner'] == 'blue'].copy()

# Add suffixes to feature columns
feature_cols = [col for col in fighter_features.columns if col.startswith('avg_')]

red_rename = {col: f'{col}_red' for col in feature_cols}
blue_rename = {col: f'{col}_blue' for col in feature_cols}

red_features = red_features.rename(columns=red_rename)
blue_features = blue_features.rename(columns=blue_rename)

# Merge back to original dataframe
df_merged = df_merged.merge(
    red_features[['fighter_name', 'event_date', 'opponent'] + list(red_rename.values())],
    left_on=['fighter_red', 'event_date', 'fighter_blue'],
    right_on=['fighter_name', 'event_date', 'opponent'],
    how='left'
).merge(
    blue_features[['fighter_name', 'event_date', 'opponent'] + list(blue_rename.values())],
    left_on=['fighter_blue', 'event_date', 'fighter_red'],
    right_on=['fighter_name', 'event_date', 'opponent'],
    how='left',
    suffixes=('', '_blue_temp')
)

# Clean up duplicate columns
df_merged = df_merged.drop(['fighter_name', 'fighter_name_blue_temp', 'opponent', 
                            'opponent_blue_temp'], axis=1, errors='ignore')

df_merged


Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red,avg_rounds_red,avg_time_red,avg_knockdowns_red,avg_sig_attempts_red,avg_sig_strikes_red,avg_total_strikes_attempts_red,avg_total_strikes_red,avg_sub_attempts_red,avg_takedowns_red,avg_takedown_attempts_red,avg_head_strikes_red,avg_head_attempts_red,avg_body_strikes_red,avg_body_attempts_red,avg_leg_strikes_red,avg_leg_attempts_red,avg_distance_red,avg_distance_attempts_red,avg_clinch_strikes_red,avg_clinch_attempts_red,avg_ground_strikes_red,avg_ground_attempts_red,avg_rounds_blue,avg_time_blue,avg_knockdowns_blue,avg_sig_attempts_blue,avg_sig_strikes_blue,avg_total_strikes_attempts_blue,avg_total_strikes_blue,avg_sub_attempts_blue,avg_takedowns_blue,avg_takedown_attempts_blue,avg_head_strikes_blue,avg_head_attempts_blue,avg_body_strikes_blue,avg_body_attempts_blue,avg_leg_strikes_blue,avg_leg_attempts_blue,avg_distance_blue,avg_distance_attempts_blue,avg_clinch_strikes_blue,avg_clinch_attempts_blue,avg_ground_strikes_blue,avg_ground_attempts_blue
0,1994-03-11,UFC 2: No Way Out,jason delucia,scott baker,1,401,Open Weight,Submission,1,Southpaw,Orthodox,0,0,5,2,3,0,25,23,20,14,5,0,0,1,1,1,0,0,1,0,2,2,1,0,2,0,1,0,1,0,2,0,4,2,0,0,0,0,1,0,1,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,71.000000,190.000000,75.800000,40.600000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1994-03-11,UFC 2: No Way Out,royce gracie,patrick smith,1,77,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,4,2,4,1,11,3,11,2,0,0,1,0,2,0,0,0,3,0,3,0,0,1,0,2,1,0,1,0,0,0,0,1,1,1,1,1,3,0,3,0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,,,,,,,,,,,,,,,,,,,,,,,1.000000,51.666667,0.0000,10.000000,6.333333,12.666667,8.333333,0.666667,0.000000,0.333333,4.333333,6.666667,1.333333,1.333333,0.666667,2.000000,1.000000,3.333333,1.333333,1.333333,4.00,5.333333
2,1994-03-11,UFC 2: No Way Out,scott morris,sean daugherty,1,20,Open Weight,Submission,1,Orthodox,Orthodox,0,0,1,4,1,0,2,5,2,1,1,0,1,0,1,0,0,0,1,0,1,2,0,0,0,0,0,0,0,2,0,0,0,3,1,0,1,1,0,0,0,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1994-03-11,UFC 2: No Way Out,remco pardoel,alberta cerra leon,1,591,Open Weight,Submission,1,Southpaw,Orthodox,0,0,6,3,4,1,22,11,20,9,1,0,1,0,1,0,0,0,4,1,5,3,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,4,0,5,2,2025-09-27,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1994-03-11,UFC 2: No Way Out,orlando wiet,robert lucarelli,1,170,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,12,6,8,2,15,6,11,2,0,1,0,1,0,1,0,0,7,1,11,2,1,0,1,1,0,1,0,3,1,2,3,6,0,0,0,0,7,0,9,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8177,2025-09-13,UFC Fight Night: Lopes vs. Silva,alessandro costa,alden coria,3,47,Flyweight,KO/TKO,0,Orthodox,Orthodox,0,0,56,95,12,42,59,131,14,77,2,0,1,2,4,2,84,198,7,34,37,85,1,1,6,3,4,7,13,7,12,29,56,79,0,0,0,0,0,13,0,16,2025-09-13,68.000000,125.000000,67.000000,38.200000,4.0,1.0,2.78,0.00,0.00,647.0,2022-12-17,64.000000,125.000000,67.000000,36.000000,4.0,4.0,0.46,0.46,0.69,656.0,2.500000,147.750000,0.500000,86.250000,45.500000,108.000000,62.00,0.000000,0.250000,2.00,26.500000,62.2500,8.500000,11.750000,10.500000,12.250000,32.500000,69.250000,5.250000,6.250000,7.750000,10.750000,,,,,,,,,,,,,,,,,,,,,,
8178,2025-09-13,UFC Fight Night: Lopes vs. Silva,montse rendon,alice pereira,3,300,Women's Bantamweight,Decision - Split,1,Orthodox,Orthodox,0,0,166,117,45,37,185,123,60,43,1,0,0,0,5,3,105,19,31,29,136,103,6,7,14,12,8,1,16,2,43,35,163,115,2,2,3,2,0,0,0,0,2025-09-13,70.140541,166.505447,71.587092,39.902128,2.0,3.0,0.00,0.00,0.00,900.0,2023-09-23,68.000000,135.000000,68.000000,41.000000,3.0,3.0,2.00,0.33,0.00,900.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8179,2025-09-13,UFC Fight Night: Lopes vs. Silva,rob font,david martinez,3,300,Bantamweight,Decision - Unanimous,0,Orthodox,Orthodox,0,0,158,155,49,62,158,210,49,115,0,0,0,0,5,2,59,33,40,33,138,115,7,10,17,16,2,19,3,24,49,62,158,155,0,0,0,0,0,0,0,0,2025-03-29,65.000000,135.000000,67.500000,37.500000,5.0,3.0,0.00,0.00,0.43,693.0,2014-07-05,68.000000,138.000000,71.500000,38.500000,5.0,4.0,0.74,0.34,0.40,789.0,2.736842,262.421053,0.368421,154.947368,71.684211,163.263158,79.00,0.315789,0.684211,2.00,54.263158,133.0000,12.578947,16.263158,4.842105,5.684211,63.526316,144.210526,4.894737,6.368421,3.263158,4.368421,1.000000,278.000000,1.0000,41.000000,27.000000,42.000000,28.000000,0.000000,0.000000,1.000000,21.000000,34.000000,1.000000,2.000000,5.000000,5.000000,14.000000,25.000000,5.000000,5.000000,8.00,11.000000
8180,2025-09-13,UFC Fight Night: Lopes vs. Silva,jose daniel medina,dusko todorovic,1,261,Middleweight,Submission,0,Orthodox,Orthodox,0,0,25,24,9,16,29,36,13,27,0,1,0,2,0,2,0,136,5,10,17,18,3,6,7,6,1,0,1,0,9,10,24,18,0,6,1,6,0,0,0,0,2019-08-27,73.000000,186.000000,74.000000,41.000000,5.0,4.0,1.77,0.35,0.00,461.0,2024-08-24,72.000000,186.000000,74.500000,40.000000,2.0,5.0,0.40,0.00,0.00,568.0,,,,,,,,,,,,,,,,,,,,,,,1.666667,234.777778,0.0000,58.555556,31.333333,79.444444,49.222222,0.111111,0.888889,3.444444,23.222222,49.444444,5.777778,6.666667,2.333333,2.444444,15.666667,37.777778,7.666667,8.888889,8.00,11.888889


Wins & Losses

In [26]:
def create_fighter_record_features(df):
    """
    Create win/loss tracking features for UFC fighters while avoiding data leakage.
    
    Parameters:
    df: DataFrame with UFC fight data
    
    Returns:
    DataFrame with additional features for fighter records
    """
    # Create a copy to avoid modifying the original
    df_copy = df.copy()
    
    # Ensure event_date is datetime
    df_copy['event_date'] = pd.to_datetime(df_copy['event_date'])
    
    # Sort by date to process fights chronologically
    df_copy = df_copy.sort_values('event_date').reset_index(drop=True)
    
    # Initialize tracking dictionaries for each fighter
    fighter_wins = {}
    fighter_losses = {}
    fighter_total_fights = {}
    
    # Initialize the new columns
    df_copy['wins_before_red'] = 0
    df_copy['losses_before_red'] = 0
    df_copy['total_fights_before_red'] = 0
    df_copy['wins_before_blue'] = 0
    df_copy['losses_before_blue'] = 0
    df_copy['total_fights_before_blue'] = 0
    
    # Process each fight chronologically
    for idx, row in df_copy.iterrows():
        red_fighter = row['fighter_red']
        blue_fighter = row['fighter_blue']
        winner = row['winner']
        
        # Get current records BEFORE this fight (to avoid data leakage)
        wins_before_red = fighter_wins.get(red_fighter, 0)
        losses_before_red = fighter_losses.get(red_fighter, 0)
        red_total_before = fighter_total_fights.get(red_fighter, 0)
        
        wins_before_blue = fighter_wins.get(blue_fighter, 0)
        losses_before_blue = fighter_losses.get(blue_fighter, 0)
        blue_total_before = fighter_total_fights.get(blue_fighter, 0)
        
        # Assign the records before this fight
        df_copy.at[idx, 'wins_before_red'] = wins_before_red
        df_copy.at[idx, 'losses_before_red'] = losses_before_red
        df_copy.at[idx, 'total_fights_before_red'] = red_total_before
        df_copy.at[idx, 'wins_before_blue'] = wins_before_blue
        df_copy.at[idx, 'losses_before_blue'] = losses_before_blue
        df_copy.at[idx, 'total_fights_before_blue'] = blue_total_before
        
        # Update records AFTER processing this fight
        # Red fighter
        if red_fighter not in fighter_wins:
            fighter_wins[red_fighter] = 0
            fighter_losses[red_fighter] = 0
            fighter_total_fights[red_fighter] = 0
        
        # Blue fighter
        if blue_fighter not in fighter_wins:
            fighter_wins[blue_fighter] = 0
            fighter_losses[blue_fighter] = 0
            fighter_total_fights[blue_fighter] = 0
        
        # Update based on fight result
        if winner == 1:  # Red fighter wins
            fighter_wins[red_fighter] += 1
            fighter_losses[blue_fighter] += 1
        else:  # Blue fighter wins
            fighter_wins[blue_fighter] += 1
            fighter_losses[red_fighter] += 1
        
        # Update total fights for both fighters
        fighter_total_fights[red_fighter] += 1
        fighter_total_fights[blue_fighter] += 1
    
    return df_copy

def create_recent_performance_features(df, recent_fights=3):
    """
    Create features based on recent fight performance
    This is more complex and requires tracking recent results
    """
    df_copy = df.copy()
    df_copy['event_date'] = pd.to_datetime(df_copy['event_date'])
    df_copy = df_copy.sort_values('event_date').reset_index(drop=True)
    
    # Track recent results for each fighter
    fighter_recent_results = {}  # Will store list of recent results for each fighter
    
    # Initialize columns
    df_copy[f'wins_last_{recent_fights}_red'] = 0
    df_copy[f'wins_last_{recent_fights}_blue'] = 0
    df_copy[f'fights_last_{recent_fights}_red'] = 0
    df_copy[f'fights_last_{recent_fights}_blue'] = 0
    
    for idx, row in df_copy.iterrows():
        red_fighter = row['fighter_red']
        blue_fighter = row['fighter_blue']
        winner = row['winner']
        
        # Initialize if first time seeing fighter
        if red_fighter not in fighter_recent_results:
            fighter_recent_results[red_fighter] = []
        if blue_fighter not in fighter_recent_results:
            fighter_recent_results[blue_fighter] = []
        
        # Get recent form BEFORE this fight
        red_recent = fighter_recent_results[red_fighter][-recent_fights:]
        blue_recent = fighter_recent_results[blue_fighter][-recent_fights:]
        
        # Calculate recent performance
        df_copy.at[idx, f'wins_last_{recent_fights}_red'] = sum(red_recent)
        df_copy.at[idx, f'wins_last_{recent_fights}_blue'] = sum(blue_recent)
        df_copy.at[idx, f'fights_last_{recent_fights}_red'] = len(red_recent)
        df_copy.at[idx, f'fights_last_{recent_fights}_blue'] = len(blue_recent)
        
        # Update recent results AFTER processing this fight
        if winner == 1:  # Red wins
            fighter_recent_results[red_fighter].append(1)
            fighter_recent_results[blue_fighter].append(0)
        else:  # Blue wins
            fighter_recent_results[red_fighter].append(0)
            fighter_recent_results[blue_fighter].append(1)
    
    return df_copy

Win Ratio

In [27]:
def create_win_ratio_record(df):
    """
    Create additional derived features from win/loss records
    """
    df_processed = df.copy()
    
    # Win percentage (handle division by zero)
    df_processed['win_pct_before_red'] = np.where(
        df_processed['total_fights_before_red'] > 0,
        (df_processed['wins_before_red'] / df_processed['total_fights_before_red']).round(3),
        0
    )
    
    df_processed['win_pct_before_blue'] = np.where(
        df_processed['total_fights_before_blue'] > 0,
        (df_processed['wins_before_blue'] / df_processed['total_fights_before_blue']).round(3),
        0
    )
        
    return df_processed

Days since debut

In [28]:
def create_days_since_debut_features(df):
    """
    Calculate days since each fighter's debut, avoiding data leakage.
    
    Parameters:
    df: DataFrame with UFC fight data (must be sorted by event_date)
    
    Returns:
    DataFrame with days_since_debut features added
    """
    df_copy = df.copy()
    
    # Ensure event_date is datetime and data is sorted
    df_copy['event_date'] = pd.to_datetime(df_copy['event_date'])
    df_copy = df_copy.sort_values('event_date').reset_index(drop=True)
    
    # Track each fighter's debut date
    fighter_debut_dates = {}
    
    # Initialize the new columns
    df_copy['days_since_debut_red'] = 0
    df_copy['days_since_debut_blue'] = 0
    
    # Process each fight chronologically
    for idx, row in df_copy.iterrows():
        red_fighter = row['fighter_red']
        blue_fighter = row['fighter_blue']
        current_date = row['event_date']
        
        # Check if this is the fighter's debut (first time we see them)
        if red_fighter not in fighter_debut_dates:
            # This is red fighter's debut
            fighter_debut_dates[red_fighter] = current_date
            df_copy.at[idx, 'days_since_debut_red'] = 0
        else:
            # Calculate days since debut
            days_since = (current_date - fighter_debut_dates[red_fighter]).days
            df_copy.at[idx, 'days_since_debut_red'] = days_since
        
        if blue_fighter not in fighter_debut_dates:
            # This is blue fighter's debut
            fighter_debut_dates[blue_fighter] = current_date
            df_copy.at[idx, 'days_since_debut_blue'] = 0
        else:
            # Calculate days since debut
            days_since = (current_date - fighter_debut_dates[blue_fighter]).days
            df_copy.at[idx, 'days_since_debut_blue'] = days_since
    
    return df_copy

Days since last win

In [29]:
def create_days_since_last_win_features(df):
    """
    Calculate days since each fighter's last win, avoiding data leakage.
    
    Parameters:
    df: DataFrame with UFC fight data (must be sorted by event_date)
    
    Returns:
    DataFrame with days_since_last_win features added
    """
    df_copy = df.copy()
    
    # Ensure event_date is datetime and data is sorted
    df_copy['event_date'] = pd.to_datetime(df_copy['event_date'])
    df_copy = df_copy.sort_values('event_date').reset_index(drop=True)
    
    # Track each fighter's last win date
    fighter_last_win_dates = {}
    
    # Initialize the new columns
    df_copy['days_since_last_win_red'] = np.nan  # NaN for fighters who never won
    df_copy['days_since_last_win_blue'] = np.nan
    
    # Process each fight chronologically
    for idx, row in df_copy.iterrows():
        red_fighter = row['fighter_red']
        blue_fighter = row['fighter_blue']
        current_date = row['event_date']
        winner = row['winner']
        
        # Calculate days since last win BEFORE this fight (to avoid data leakage)
        if red_fighter in fighter_last_win_dates:
            days_since = (current_date - fighter_last_win_dates[red_fighter]).days
            df_copy.at[idx, 'days_since_last_win_red'] = days_since
        else:
            # Fighter has never won before, or this is their first fight
            df_copy.at[idx, 'days_since_last_win_red'] = np.nan
        
        if blue_fighter in fighter_last_win_dates:
            days_since = (current_date - fighter_last_win_dates[blue_fighter]).days
            df_copy.at[idx, 'days_since_last_win_blue'] = days_since
        else:
            # Fighter has never won before, or this is their first fight
            df_copy.at[idx, 'days_since_last_win_blue'] = np.nan
        
        # Update last win dates AFTER processing this fight
        if winner == 1:  # Red fighter wins
            fighter_last_win_dates[red_fighter] = current_date
        elif winner == 0:  # Blue fighter wins
            fighter_last_win_dates[blue_fighter] = current_date
    
    # Add derived features
    
    # Binary indicators
    #df_copy['red_never_won'] = df_copy['days_since_last_win_red'].isna().astype(int)
    #df_copy['blue_never_won'] = df_copy['days_since_last_win_blue'].isna().astype(int)
    
    df_copy['recent_winner_red'] = (df_copy['days_since_last_win_red'] <= 365).astype(int)  # Won within last year
    df_copy['recent_winner_blue'] = (df_copy['days_since_last_win_blue'] <= 365).astype(int)
    
    # Handle NaN values for modeling (replace with a large number or separate indicator)
    # Replace NaN with a large number (e.g., 9999 days =~ 27.3 years) to indicate "never won"
    df_copy['days_since_last_win_red'] = df_copy['days_since_last_win_red'].fillna(9999)
    df_copy['days_since_last_win_blue'] = df_copy['days_since_last_win_blue'].fillna(9999)
    
    return df_copy

Wins in the last calendar year

In [30]:
def create_wins_last_year_features(df):
    """
    Calculate wins in the last 365 days for each fighter, avoiding data leakage.
    
    Parameters:
    df: DataFrame with UFC fight data (must be sorted by event_date)
    
    Returns:
    DataFrame with wins_last_year features added
    """
    df_copy = df.copy()
    
    # Ensure event_date is datetime and data is sorted
    df_copy['event_date'] = pd.to_datetime(df_copy['event_date'])
    df_copy = df_copy.sort_values('event_date').reset_index(drop=True)
    
    # Track fight history for each fighter (date, result pairs)
    fighter_fight_history = {}
    
    # Initialize the new columns
    df_copy['wins_last_365_days_red'] = 0
    df_copy['wins_last_365_days_blue'] = 0
    df_copy['fights_last_365_days_red'] = 0
    df_copy['fights_last_365_days_blue'] = 0
    
    # Process each fight chronologically
    for idx, row in df_copy.iterrows():
        red_fighter = row['fighter_red']
        blue_fighter = row['fighter_blue']
        current_date = row['event_date']
        winner = row['winner']
        
        # Initialize fighter history if first time seeing them
        if red_fighter not in fighter_fight_history:
            fighter_fight_history[red_fighter] = []
        if blue_fighter not in fighter_fight_history:
            fighter_fight_history[blue_fighter] = []
        
        # Calculate cutoff date for last 365 days (before current fight to avoid leakage)
        cutoff_date = current_date - timedelta(days=365)
        
        # Count wins and total fights in last 365 days for red fighter
        red_recent_fights = [
            fight for fight in fighter_fight_history[red_fighter] 
            if fight['date'] > cutoff_date
        ]
        red_wins_365 = sum(1 for fight in red_recent_fights if fight['won'])
        red_fights_365 = len(red_recent_fights)
        
        # Count wins and total fights in last 365 days for blue fighter
        blue_recent_fights = [
            fight for fight in fighter_fight_history[blue_fighter] 
            if fight['date'] > cutoff_date
        ]
        blue_wins_365 = sum(1 for fight in blue_recent_fights if fight['won'])
        blue_fights_365 = len(blue_recent_fights)
        
        # Assign the counts BEFORE this fight (avoiding data leakage)
        df_copy.at[idx, 'wins_last_365_days_red'] = red_wins_365
        df_copy.at[idx, 'wins_last_365_days_blue'] = blue_wins_365
        df_copy.at[idx, 'fights_last_365_days_red'] = red_fights_365
        df_copy.at[idx, 'fights_last_365_days_blue'] = blue_fights_365
        
        # Update fight history AFTER processing this fight
        # Add red fighter's result
        red_won = (winner == 1)
        fighter_fight_history[red_fighter].append({
            'date': current_date,
            'won': red_won
        })
        
        # Add blue fighter's result  
        blue_won = (winner == 0)
        fighter_fight_history[blue_fighter].append({
            'date': current_date,
            'won': blue_won
        })
    
    # Add derived features
    # Win rate in last 365 days
    '''
    df_copy['win_rate_last_365_days_red'] = np.where(
        df_copy['fights_last_365_days_red'] > 0,
        df_copy['wins_last_365_days_red'] / df_copy['fights_last_365_days_red'],
        0
    )
    
    df_copy['win_rate_last_365_days_blue'] = np.where(
        df_copy['fights_last_365_days_blue'] > 0,
        df_copy['wins_last_365_days_blue'] / df_copy['fights_last_365_days_blue'],
        0
    )
    '''

    df_copy['undefeated_last_year_red'] = (
        (df_copy['fights_last_365_days_red'] > 0) & 
        (df_copy['wins_last_365_days_red'] == df_copy['fights_last_365_days_red'])
    ).astype(int)
    
    df_copy['undefeated_last_year_blue'] = (
        (df_copy['fights_last_365_days_blue'] > 0) & 
        (df_copy['wins_last_365_days_blue'] == df_copy['fights_last_365_days_blue'])
    ).astype(int)
    
    return df_copy

Win Lose streak

In [31]:
def create_win_lose_streak_features(df):
    """
    Calculate current win/lose streaks for each fighter, avoiding data leakage.
    
    Parameters:
    df: DataFrame with UFC fight data (must be sorted by event_date)
    
    Returns:
    DataFrame with win/lose streak features added
    """
    df_copy = df.copy()
    
    # Ensure event_date is datetime and data is sorted
    df_copy['event_date'] = pd.to_datetime(df_copy['event_date'])
    df_copy = df_copy.sort_values('event_date').reset_index(drop=True)
    
    # Track streak information for each fighter
    fighter_streaks = {}
    
    # Initialize the new columns
    df_copy['win_streak_red'] = 0
    df_copy['win_streak_blue'] = 0
    df_copy['lose_streak_red'] = 0
    df_copy['lose_streak_blue'] = 0
    
    # Process each fight chronologically
    for idx, row in df_copy.iterrows():
        red_fighter = row['fighter_red']
        blue_fighter = row['fighter_blue']
        winner = row['winner']
        
        # Initialize fighter streak tracking if first time seeing them
        if red_fighter not in fighter_streaks:
            fighter_streaks[red_fighter] = {
                'current_win_streak': 0,
                'current_lose_streak': 0,
                'last_result': None  # 'win', 'loss', or None
            }
        
        if blue_fighter not in fighter_streaks:
            fighter_streaks[blue_fighter] = {
                'current_win_streak': 0,
                'current_lose_streak': 0,
                'last_result': None
            }
        
        # Get current streaks BEFORE this fight (to avoid data leakage)
        win_streak_red = fighter_streaks[red_fighter]['current_win_streak']
        lose_streak_red = fighter_streaks[red_fighter]['current_lose_streak']
        win_streak_blue = fighter_streaks[blue_fighter]['current_win_streak']
        lose_streak_blue = fighter_streaks[blue_fighter]['current_lose_streak']
        
        # Assign streaks before this fight
        df_copy.at[idx, 'win_streak_red'] = win_streak_red
        df_copy.at[idx, 'lose_streak_red'] = lose_streak_red
        df_copy.at[idx, 'win_streak_blue'] = win_streak_blue
        df_copy.at[idx, 'lose_streak_blue'] = lose_streak_blue
        
        # Update streaks AFTER processing this fight
        if winner == 1:  # Red fighter wins
            # Red fighter wins - update win streak, reset lose streak
            fighter_streaks[red_fighter]['current_win_streak'] += 1
            fighter_streaks[red_fighter]['current_lose_streak'] = 0
            fighter_streaks[red_fighter]['last_result'] = 'win'
            
            # Blue fighter loses - update lose streak, reset win streak
            fighter_streaks[blue_fighter]['current_lose_streak'] += 1
            fighter_streaks[blue_fighter]['current_win_streak'] = 0
            fighter_streaks[blue_fighter]['last_result'] = 'loss'
            
        elif winner == 0:  # Blue fighter wins
            # Blue fighter wins - update win streak, reset lose streak
            fighter_streaks[blue_fighter]['current_win_streak'] += 1
            fighter_streaks[blue_fighter]['current_lose_streak'] = 0
            fighter_streaks[blue_fighter]['last_result'] = 'win'
            
            # Red fighter loses - update lose streak, reset win streak
            fighter_streaks[red_fighter]['current_lose_streak'] += 1
            fighter_streaks[red_fighter]['current_win_streak'] = 0
            fighter_streaks[red_fighter]['last_result'] = 'loss'
    
    # Add derived features
    
    # Momentum indicators
    df_copy['on_win_streak_red'] = (df_copy['win_streak_red'] >= 1).astype(int)
    df_copy['on_win_streak_blue'] = (df_copy['win_streak_blue'] >= 1).astype(int)
    #df_copy['red_on_lose_streak'] = (df_copy['lose_streak_red'] >= 1).astype(int)
    #df_copy['blue_on_lose_streak'] = (df_copy['lose_streak_blue'] >= 1).astype(int)
    
    # Long streak indicators (3+ wins/losses)
    df_copy['long_win_streak_red'] = (df_copy['win_streak_red'] >= 3).astype(int)
    df_copy['long_win_streak_blue'] = (df_copy['win_streak_blue'] >= 3).astype(int)
    #df_copy['red_long_lose_streak'] = (df_copy['lose_streak_red'] >= 3).astype(int)
    #df_copy['blue_long_lose_streak'] = (df_copy['lose_streak_blue'] >= 3).astype(int)
    
    return df_copy

In [32]:
# Example usage with your dataset
def process_ufc_data(df):
    """
    Complete pipeline to add all fighter record features
    """
    print("Processing UFC data for feature engineering...")
    print(f"Original dataset shape: {df.shape}")
    
    # Step 1: Create win/loss features
    df_with_records = create_fighter_record_features(df)
    print("✓ Added win/loss tracking features")

    # Step 2: Add a win ratio
    df_processed = create_win_ratio_record(df_with_records)
    print("✓ Added win ratio features")
    
    # Step 3: Add recent form features
    df_processed = create_recent_performance_features(df_processed, recent_fights=3)
    print("✓ Added recent performance features")

    # Step 4: Days since debut
    df_processed = create_days_since_debut_features(df_processed)
    print("✓ Added days since debut features")

    # Step 5: Add days since last win
    df_processed = create_days_since_last_win_features(df_processed)
    print("✓ Added days since last win/loss features")

    # Step 6: Add wins in last calendar year
    df_processed = create_wins_last_year_features(df_processed)
    print("✓ Added calendar year features")

    # Step 7: Add win/loss streak
    df_final = create_win_lose_streak_features(df_processed)
    print("✓ Added win/loss streak features")
    
    print(f"Final dataset shape: {df_final.shape}")
    print(f"Added {df_final.shape[1] - df.shape[1]} new features")
    
    # Display summary of new features
    new_features = [col for col in df_final.columns if col not in df.columns]
    print("\nNew features added:")
    for feature in new_features:
        print(f"  - {feature}")
    
    return df_final

In [33]:
df_processed = process_ufc_data(df_merged)

Processing UFC data for feature engineering...
Original dataset shape: (8182, 119)
✓ Added win/loss tracking features
✓ Added win ratio features
✓ Added recent performance features
✓ Added days since debut features
✓ Added days since last win/loss features
✓ Added calendar year features
✓ Added win/loss streak features
Final dataset shape: (8182, 151)
Added 32 new features

New features added:
  - wins_before_red
  - losses_before_red
  - total_fights_before_red
  - wins_before_blue
  - losses_before_blue
  - total_fights_before_blue
  - win_pct_before_red
  - win_pct_before_blue
  - wins_last_3_red
  - wins_last_3_blue
  - fights_last_3_red
  - fights_last_3_blue
  - days_since_debut_red
  - days_since_debut_blue
  - days_since_last_win_red
  - days_since_last_win_blue
  - recent_winner_red
  - recent_winner_blue
  - wins_last_365_days_red
  - wins_last_365_days_blue
  - fights_last_365_days_red
  - fights_last_365_days_blue
  - undefeated_last_year_red
  - undefeated_last_year_blue
 

In [34]:
df_processed.loc[
    (df_processed['fighter_red'] == 'alex pereira') |
    (df_processed['fighter_blue'] == 'alex pereira'),
    ['event_date', 'fighter_red', 'fighter_blue', 'winner', 'wins_before_red', 'losses_before_red',
    'total_fights_before_red', 'wins_last_3_red', 'fights_last_3_red', 'win_pct_before_red',
    'days_since_debut_red','days_since_last_win_red', 'recent_winner_red', 
    'wins_last_365_days_red', 'fights_last_365_days_red', 'undefeated_last_year_red',
    'win_streak_red','lose_streak_red','on_win_streak_red','long_win_streak_red',
    'wins_before_blue', 'losses_before_blue',
    'total_fights_before_blue', 'wins_last_3_blue', 'fights_last_3_blue', 'win_pct_before_blue',
    'days_since_debut_blue', 'days_since_last_win_blue', 'recent_winner_blue',
    'wins_last_365_days_blue', 'fights_last_365_days_blue', 'undefeated_last_year_blue',
    'win_streak_blue','lose_streak_blue','on_win_streak_blue','long_win_streak_blue']
]


Unnamed: 0,event_date,fighter_red,fighter_blue,winner,wins_before_red,losses_before_red,total_fights_before_red,wins_last_3_red,fights_last_3_red,win_pct_before_red,days_since_debut_red,days_since_last_win_red,recent_winner_red,wins_last_365_days_red,fights_last_365_days_red,undefeated_last_year_red,win_streak_red,lose_streak_red,on_win_streak_red,long_win_streak_red,wins_before_blue,losses_before_blue,total_fights_before_blue,wins_last_3_blue,fights_last_3_blue,win_pct_before_blue,days_since_debut_blue,days_since_last_win_blue,recent_winner_blue,wins_last_365_days_blue,fights_last_365_days_blue,undefeated_last_year_blue,win_streak_blue,lose_streak_blue,on_win_streak_blue,long_win_streak_blue
6220,2021-11-06,alex pereira,andreas michailidis,1,0,0,0,0,0,0.0,0,9999.0,0,0,0,0,0,0,0,0,1,1,2,1,2,0.5,479,189.0,1,1,1,1,1,0,1,0
6371,2022-03-12,alex pereira,bruno silva,1,1,0,1,1,1,1.0,126,126.0,1,1,1,1,1,0,1,0,5,2,7,3,3,0.714,728,91.0,1,5,5,1,5,0,1,1
6547,2022-07-02,sean strickland,alex pereira,0,12,3,15,3,3,0.8,3031,147.0,1,2,2,1,6,0,1,1,2,0,2,2,2,1.0,238,112.0,1,2,2,1,2,0,1,0
6734,2022-11-12,israel adesanya,alex pereira,0,12,1,13,3,3,0.923,1736,133.0,1,2,2,1,3,0,1,1,3,0,3,3,3,1.0,371,133.0,1,2,2,1,3,0,1,1
6921,2023-04-08,alex pereira,israel adesanya,0,4,0,4,3,3,1.0,518,147.0,1,2,2,1,4,0,1,1,12,2,14,2,3,0.857,1883,280.0,1,1,2,0,0,1,0,0
7094,2023-07-29,jan blachowicz,alex pereira,0,12,6,18,2,3,0.667,3220,441.0,0,0,0,0,1,0,1,0,4,1,5,2,3,0.8,630,259.0,1,1,2,0,0,1,0,0
7246,2023-11-11,jiri prochazka,alex pereira,0,3,0,3,3,3,1.0,1218,518.0,0,0,0,0,3,0,1,1,5,1,6,2,3,0.833,735,105.0,1,2,3,0,1,0,1,0
7453,2024-04-13,alex pereira,jamahal hill,1,6,1,7,2,3,0.857,889,154.0,1,2,2,1,2,0,1,0,6,1,7,3,3,0.857,1540,448.0,0,0,0,0,4,0,1,1
7562,2024-06-29,alex pereira,jiri prochazka,1,7,1,8,3,3,0.875,966,77.0,1,3,3,1,3,0,1,1,4,1,5,2,3,0.8,1449,77.0,1,1,2,0,1,0,1,0
7686,2024-10-05,alex pereira,khalil rountree jr,1,8,1,9,3,3,0.889,1064,98.0,1,3,3,1,4,0,1,1,9,5,14,3,3,0.643,3011,301.0,1,1,1,1,5,0,1,1


In [35]:
df_processed.to_csv('../data/notebooks/df_processed.csv', index = False)

In [36]:
df_processed

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red,avg_rounds_red,avg_time_red,avg_knockdowns_red,avg_sig_attempts_red,avg_sig_strikes_red,avg_total_strikes_attempts_red,avg_total_strikes_red,avg_sub_attempts_red,avg_takedowns_red,avg_takedown_attempts_red,avg_head_strikes_red,avg_head_attempts_red,avg_body_strikes_red,avg_body_attempts_red,avg_leg_strikes_red,avg_leg_attempts_red,avg_distance_red,avg_distance_attempts_red,avg_clinch_strikes_red,avg_clinch_attempts_red,avg_ground_strikes_red,avg_ground_attempts_red,avg_rounds_blue,avg_time_blue,avg_knockdowns_blue,avg_sig_attempts_blue,avg_sig_strikes_blue,avg_total_strikes_attempts_blue,avg_total_strikes_blue,avg_sub_attempts_blue,avg_takedowns_blue,avg_takedown_attempts_blue,avg_head_strikes_blue,avg_head_attempts_blue,avg_body_strikes_blue,avg_body_attempts_blue,avg_leg_strikes_blue,avg_leg_attempts_blue,avg_distance_blue,avg_distance_attempts_blue,avg_clinch_strikes_blue,avg_clinch_attempts_blue,avg_ground_strikes_blue,avg_ground_attempts_blue,wins_before_red,losses_before_red,total_fights_before_red,wins_before_blue,losses_before_blue,total_fights_before_blue,win_pct_before_red,win_pct_before_blue,wins_last_3_red,wins_last_3_blue,fights_last_3_red,fights_last_3_blue,days_since_debut_red,days_since_debut_blue,days_since_last_win_red,days_since_last_win_blue,recent_winner_red,recent_winner_blue,wins_last_365_days_red,wins_last_365_days_blue,fights_last_365_days_red,fights_last_365_days_blue,undefeated_last_year_red,undefeated_last_year_blue,win_streak_red,win_streak_blue,lose_streak_red,lose_streak_blue,on_win_streak_red,on_win_streak_blue,long_win_streak_red,long_win_streak_blue
0,1994-03-11,UFC 2: No Way Out,jason delucia,scott baker,1,401,Open Weight,Submission,1,Southpaw,Orthodox,0,0,5,2,3,0,25,23,20,14,5,0,0,1,1,1,0,0,1,0,2,2,1,0,2,0,1,0,1,0,2,0,4,2,0,0,0,0,1,0,1,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,71.000000,190.000000,75.800000,40.600000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1994-03-11,UFC 2: No Way Out,patrick smith,scott morris,1,30,Open Weight,KO/TKO,1,Orthodox,Orthodox,0,0,17,0,13,0,25,0,19,0,0,0,0,0,0,1,0,0,12,0,16,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,12,0,16,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,1.00,20.000000,0.0000,1.0000,1.000000,2.000000,2.000000,1.000000,1.0000,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000,0.0000,0.000000,1.000000,1.000000,0.00,0.000000,2,0,2,0,0,0,1.000,0.000,0,1,1,1,0,0,9999.0,0.0,0,1,2,1,3,1,0,1,0,0,0,0,0,0,0,0
2,1994-03-11,UFC 2: No Way Out,royce gracie,minoki ichihara,1,308,Open Weight,Submission,1,Southpaw,Orthodox,0,0,4,7,2,3,114,16,110,12,2,0,1,0,1,0,0,0,0,3,0,6,0,0,0,0,2,0,4,1,2,0,4,2,0,0,0,0,0,3,0,5,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,1.000000,77.0000,0.000000,4.0000,4.0000,11.000000,11.00,0.000000,1.000000,2.000000,3.0,3.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.0000,3.000000,3.000000,,,,,,,,,,,,,,,,,,,,,,,2,0,2,0,0,0,1.000,0.000,1,0,1,0,0,0,9999.0,9999.0,0,0,3,0,3,0,1,0,0,0,0,0,0,0,0,0
3,1994-03-11,UFC 2: No Way Out,remco pardoel,orlando wiet,1,89,Open Weight,KO/TKO,1,Southpaw,Southpaw,0,0,7,2,7,1,7,7,7,5,0,0,1,0,1,0,0,0,7,0,7,1,0,0,0,0,0,1,0,1,0,1,0,2,0,0,0,0,7,0,7,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1.000000,591.0000,0.000000,6.0000,4.0000,22.000000,20.00,1.000000,1.000000,1.000000,4.0,5.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.0000,4.000000,5.000000,1.00,170.000000,0.0000,12.0000,8.000000,15.000000,11.000000,0.000000,0.0000,0.000000,7.000000,11.000000,1.000000,1.000000,0.000000,0.000,1.0000,3.000000,0.000000,0.000000,7.00,9.000000,1,1,2,1,0,1,0.500,1.000,0,0,0,0,0,0,9999.0,9999.0,0,0,1,1,2,1,0,1,0,0,0,0,0,0,0,0
4,1994-03-11,UFC 2: No Way Out,royce gracie,patrick smith,1,77,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,4,2,4,1,11,3,11,2,0,0,1,0,2,0,0,0,3,0,3,0,0,1,0,2,1,0,1,0,0,0,0,1,1,1,1,1,3,0,3,0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,,,,,,,,,,,,,,,,,,,,,,,1.00,51.666667,0.0000,10.0000,6.333333,12.666667,8.333333,0.666667,0.0000,0.333333,4.333333,6.666667,1.333333,1.333333,0.666667,2.000,1.0000,3.333333,1.333333,1.333333,4.00,5.333333,3,0,3,3,0,3,1.000,1.000,0,0,0,0,0,0,0.0,9999.0,1,0,2,2,2,2,1,1,1,1,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8177,2025-09-13,UFC Fight Night: Lopes vs. Silva,jesus aguilar,luis gurule,3,300,Flyweight,Decision - Unanimous,1,Orthodox,Orthodox,0,0,235,186,85,65,237,195,87,73,0,0,1,0,5,5,38,21,35,43,164,157,25,12,43,18,25,10,28,11,76,59,224,174,9,2,11,5,0,4,0,7,2025-04-05,65.000000,125.000000,64.000000,38.000000,5.0,5.0,0.41,0.00,0.00,738.0,2023-02-04,64.000000,125.500000,62.500000,36.000000,3.0,2.0,1.64,1.40,0.23,550.0,1.800000,207.2000,0.200000,26.6000,12.2000,33.400000,17.20,1.000000,0.600000,1.800000,8.0,21.000000,1.800000,2.000000,2.400000,3.600000,9.800000,21.400000,1.400000,3.0000,1.000000,2.200000,2.00,114.000000,0.0000,34.0000,12.000000,49.000000,26.000000,0.000000,1.0000,4.000000,3.000000,13.000000,0.000000,7.000000,9.000000,14.000,11.0000,32.000000,1.000000,2.000000,0.00,0.000000,3,2,5,0,1,1,0.600,0.000,2,0,3,1,952,161,392.0,9999.0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0
8178,2025-09-13,UFC Fight Night: Lopes vs. Silva,rodrigo sezinando,daniil donchenko,1,267,Open Weight,KO/TKO,0,Orthodox,Orthodox,0,1,36,68,10,39,36,71,10,42,0,0,1,1,4,3,62,63,7,29,30,55,3,8,6,11,0,2,0,2,7,23,33,44,3,6,3,9,0,10,0,15,2025-08-16,71.000000,186.000000,73.100000,40.500000,9.0,2.0,3.37,0.00,3.37,267.0,2025-08-16,69.000000,189.000000,71.000000,39.100000,2.0,9.0,3.37,0.00,0.00,267.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8179,2025-09-13,UFC Fight Night: Lopes vs. Silva,alessandro costa,alden coria,3,47,Flyweight,KO/TKO,0,Orthodox,Orthodox,0,0,56,95,12,42,59,131,14,77,2,0,1,2,4,2,84,198,7,34,37,85,1,1,6,3,4,7,13,7,12,29,56,79,0,0,0,0,0,13,0,16,2025-09-13,68.000000,125.000000,67.000000,38.200000,4.0,1.0,2.78,0.00,0.00,647.0,2022-12-17,64.000000,125.000000,67.000000,36.000000,4.0,4.0,0.46,0.46,0.69,656.0,2.500000,147.7500,0.500000,86.2500,45.5000,108.000000,62.00,0.000000,0.250000,2.000000,26.5,62.250000,8.500000,11.750000,10.500000,12.250000,32.500000,69.250000,5.250000,6.2500,7.750000,10.750000,,,,,,,,,,,,,,,,,,,,,,,2,2,4,0,0,0,0.500,0.000,2,0,3,0,1001,0,497.0,9999.0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0
8180,2025-09-13,UFC Fight Night: Lopes vs. Silva,diego lopes,jean silva,2,288,Featherweight,KO/TKO,1,Orthodox,Orthodox,1,0,135,91,74,43,154,91,86,43,0,0,3,0,4,0,160,3,63,26,118,67,1,10,1,17,10,7,16,7,31,42,65,90,1,1,2,1,42,0,68,0,2024-01-13,67.000000,146.000000,69.000000,36.500000,5.0,5.0,0.50,0.75,1.25,514.0,2023-05-06,71.000000,145.000000,72.500000,41.000000,4.0,5.0,0.79,1.74,0.79,631.0,2.428571,211.0000,0.571429,89.0000,42.0000,99.857143,52.00,1.142857,0.285714,0.714286,31.0,74.142857,5.142857,7.714286,5.857143,7.142857,29.428571,71.714286,4.285714,6.0000,8.285714,11.285714,1.80,181.800000,1.0000,58.0000,31.400000,58.600000,32.000000,0.600000,0.2000,0.200000,21.800000,46.200000,8.600000,10.600000,1.000000,1.200,23.8000,48.400000,5.200000,6.600000,2.40,3.000000,5,2,7,5,0,5,0.714,1.000,2,3,3,3,861,609,364.0,154.0,1,1,1,2,2,2,0,1,0,5,1,0,0,1,0,1


In [37]:
for col in df_processed.columns:
    print(col)

event_date
event_name
fighter_red
fighter_blue
round
time
weight_class
win_method
winner
stance_red
stance_blue
knockdowns_red
knockdowns_blue
sig_attempts_red
sig_attempts_blue
sig_strikes_red
sig_strikes_blue
total_strikes_attempts_red
total_strikes_attempts_blue
total_strikes_red
total_strikes_blue
sub_attempts_red
sub_attempts_blue
takedowns_red
takedowns_blue
takedown_attempts_red
takedown_attempts_blue
control_time_red
control_time_blue
head_strikes_red
head_strikes_blue
head_attempts_red
head_attempts_blue
body_strikes_red
body_strikes_blue
body_attempts_red
body_attempts_blue
leg_strikes_red
leg_strikes_blue
leg_attempts_red
leg_attempts_blue
distance_red
distance_blue
distance_attempts_red
distance_attempts_blue
clinch_strikes_red
clinch_strikes_blue
clinch_attempts_red
clinch_attempts_blue
ground_strikes_red
ground_strikes_blue
ground_attempts_red
ground_attempts_blue
octagon_debut_blue
height_blue
weight_blue
reach_blue
leg_reach_blue
sig_strikes_landed_per_minute_blue
sig_s

Temporal Feature Engineering
<br><br> 

We apply three key techniques to capture fighter performance dynamics over time:

1. Exponential Moving Average (EMA)

Smooths past performance with exponentially decreasing weights, giving more importance to recent fights.

$$
EMA_t = \alpha \cdot x_t + (1 - \alpha) \cdot EMA_{t-1}
$$

where

xt = current value,

α = smoothing factor (0<α≤1)
<br><br> 

2. Rolling Window Statistics

Captures average performance over the last 
n fights, shifted by one fight to avoid leakage.

$$
\text{RollingMean}_t = \frac{1}{n} \sum_{i=t-n}^{t-1} x_i
$$


where 
n is the window size (e.g., 5 fights).
<br><br> 

3. Momentum Features

Measures trends and fight outcome patterns:

Performance Trend: slope of win/loss trajectory over recent fights.

Finish Momentum: proportion of recent wins that ended by KO/TKO or Submission.

Dominance Momentum: weighted score for decisive wins (e.g., early-round finishes > decisions).

In [38]:
def get_fighter_stats(df: pd.DataFrame, fighter_name: str, fight_index: int) -> Dict:
    """
    Get the stats for a specific fighter in a specific fight, regardless of red/blue corner
    """
    try:
        row = df.iloc[fight_index]
        
        if row['fighter_red'] == fighter_name:
            corner = 'red'
        elif row['fighter_blue'] == fighter_name:
            corner = 'blue'
        else:
            return None
        
        # Define the key stats we want to extract (focusing on most important ones)
        stat_names = [
            'sig_strikes', 'sig_attempts', 'knockdowns', 'takedowns', 'takedown_attempts', 
            'total_strikes', 'total_strikes_attempts','sub_attempts', 'control_time',
            'head_strikes', 'head_attempts'
        ]
        
        stats = {}
        for stat in stat_names:
            col_name = f"{stat}_{corner}"
            stats[stat] = row.get(col_name, np.nan)
        
        # Also get fight outcome and method
        stats['won'] = 1 if row['winner'] == fighter_name else 0
        stats['win_method'] = row.get('win_method', np.nan)
        stats['round'] = row.get('round', np.nan)
        stats['event_date'] = row.get('event_date')
    except (IndexError, KeyError) as e:
        print(f"Error getting stats for {fighter_name} at index {fight_index}: {e}")
        return None
    
    return stats

def calculate_essential_ema_features(df: pd.DataFrame, alpha: float = 0.3) -> pd.DataFrame:
    """
    Calculate EMAs for only the most important performance metrics
    Features end with _red or _blue
    """
    result_df = df.copy()
    
    # Get all unique fighters
    all_fighters = set(df['fighter_red'].dropna().unique()) | set(df['fighter_blue'].dropna().unique())
    
    # Focus on essential stats only
    essential_stats = ['sig_strikes', 'sig_attempts', 'knockdowns', 'takedowns', 'takedown_attempts', 
            'total_strikes', 'total_strikes_attempts', 'head_strikes', 'head_attempts']
    
    # Initialize EMA columns
    for corner in ['red', 'blue']:
        for stat in essential_stats:
            result_df[f"{stat}_ema_{corner}"] = np.nan
            if 'attempts' not in stat:
                result_df[f"{stat}_success_rate_ema_{corner}"] = np.nan
    
    # Calculate EMAs for each fighter
    for fighter in all_fighters:
        if pd.isna(fighter):
            continue
        
        # Get all fights for this fighter in chronological order
        fighter_fights = []
        for idx, row in df.iterrows():
            if row['fighter_red'] == fighter or row['fighter_blue'] == fighter:
                stats = get_fighter_stats(df, fighter, df.index.get_loc(idx))
                if stats:
                    fighter_fights.append({
                        'index': idx,
                        'corner': 'red' if row['fighter_red'] == fighter else 'blue',
                        'stats': stats
                    })
        
        if len(fighter_fights) <= 1:
            continue
        
        # Calculate EMAs for this fighter
        fighter_emas = {}
        
        for i, fight in enumerate(fighter_fights):
            if i == 0:
                # First fight - initialize EMAs with current values
                for stat in essential_stats:
                    if pd.notna(fight['stats'][stat]):
                        fighter_emas[f"{stat}_ema"] = fight['stats'][stat]
                        
                        # Initialize success rate EMAs
                        if 'attempts' not in stat and f"{stat}_attempts" in fight['stats']:
                            attempts = fight['stats'][f"{stat}_attempts"]
                            if attempts > 0:
                                fighter_emas[f"{stat}_success_rate_ema"] = fight['stats'][stat] / attempts
                            else:
                                fighter_emas[f"{stat}_success_rate_ema"] = 0
                continue
            
            # Update EMAs using previous fight's values (avoid data leakage)
            prev_fight = fighter_fights[i-1]
            current_corner = fight['corner']
            
            for stat in essential_stats:
                ema_key = f"{stat}_ema"
                success_rate_key = f"{stat}_success_rate_ema"
                
                # Update volume EMA
                if ema_key in fighter_emas and pd.notna(prev_fight['stats'][stat]):
                    new_ema = alpha * prev_fight['stats'][stat] + (1 - alpha) * fighter_emas[ema_key]
                    fighter_emas[ema_key] = new_ema
                    result_df.loc[fight['index'], f"{stat}_ema_{current_corner}"] = fighter_emas[ema_key]
                
                # Update success rate EMA
                if 'attempts' not in stat and f"{stat}_attempts" in prev_fight['stats']:
                    prev_attempts = prev_fight['stats'][f"{stat}_attempts"]
                    if prev_attempts > 0:
                        prev_success_rate = prev_fight['stats'][stat] / prev_attempts
                        
                        if success_rate_key in fighter_emas:
                            new_success_ema = alpha * prev_success_rate + (1 - alpha) * fighter_emas[success_rate_key]
                            fighter_emas[success_rate_key] = new_success_ema
                            result_df.loc[fight['index'], f"{stat}_success_rate_ema_{current_corner}"] = fighter_emas[success_rate_key]
    
    return result_df

def calculate_essential_rolling_features(df: pd.DataFrame, window: int = 5) -> pd.DataFrame:
    """
    Calculate rolling features for essential metrics only, using 5-fight window
    Features end with _red or _blue
    """
    result_df = df.copy()
    
    # Get all unique fighters
    all_fighters = set(df['fighter_red'].dropna().unique()) | set(df['fighter_blue'].dropna().unique())
    
    # Essential stats only
    essential_stats = ['sig_strikes', 'sig_attempts',
                    'takedowns', 'takedown_attempts', 'control_time',
                    'knockdowns', 'total_strikes', 'total_strikes_attempts']
    
    # Initialize rolling columns
    for corner in ['red', 'blue']:
        for stat in essential_stats:
            result_df[f"{stat}_roll_{window}_{corner}"] = np.nan
            if stat in ['sig_strikes', 'takedowns']:  # Only for stats with success rates
                result_df[f"{stat}_success_rate_roll_{window}_{corner}"] = np.nan
    
    # Calculate rolling features for each fighter
    for fighter in all_fighters:
        if pd.isna(fighter):
            continue
        
        # Get all fights for this fighter
        fighter_fights = []
        for idx, row in df.iterrows():
            if row['fighter_red'] == fighter or row['fighter_blue'] == fighter:
                stats = get_fighter_stats(df, fighter, df.index.get_loc(idx))
                if stats:
                    fighter_fights.append({
                        'index': idx,
                        'corner': 'red' if row['fighter_red'] == fighter else 'blue',
                        'stats': stats
                    })
        
        if len(fighter_fights) <= 1:
            continue
        
        # Create series for each stat for this fighter
        fighter_stats_series = {}
        indices = []
        
        for fight in fighter_fights:
            indices.append(fight['index'])
            for stat in essential_stats:
                if stat not in fighter_stats_series:
                    fighter_stats_series[stat] = []
                fighter_stats_series[stat].append(fight['stats'][stat])
        
        # Convert to pandas series
        for stat in essential_stats:
            fighter_stats_series[stat] = pd.Series(fighter_stats_series[stat], index=indices)
        
        # Calculate rolling features
        for stat in essential_stats:
            if stat in fighter_stats_series:
                series = fighter_stats_series[stat]
                
                # Rolling mean (shifted by 1 to avoid data leakage)
                rolling_mean = series.rolling(window=window, min_periods=1).mean().shift(1)
                
                # Apply to correct corner for each fight
                for fight in fighter_fights:
                    idx = fight['index']
                    corner = fight['corner']
                    
                    if idx in rolling_mean.index:
                        result_df.loc[idx, f"{stat}_roll_{window}_{corner}"] = rolling_mean.loc[idx]
                
                # Calculate success rate rolling features for applicable stats
                if stat == 'sig_strikes':
                    attempts_stat = 'sig_attempts'
                elif stat == 'takedowns':
                    attempts_stat = 'takedown_attempts'
                else:
                    continue
                
                if attempts_stat in fighter_stats_series:
                    success_series = fighter_stats_series[stat] / (fighter_stats_series[attempts_stat] + 1e-8)
                    rolling_success = success_series.rolling(window=window, min_periods=1).mean().shift(1)
                    
                    for fight in fighter_fights:
                        idx = fight['index']
                        corner = fight['corner']
                        
                        if idx in rolling_success.index:
                            result_df.loc[idx, f"{stat}_success_rate_roll_{window}_{corner}"] = rolling_success.loc[idx]
    
    return result_df

def calculate_essential_momentum_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate only the most valuable momentum features that aren't redundant
    with existing win/loss tracking. Features end with _red or _blue
    """
    result_df = df.copy()
    
    # Get all unique fighters
    all_fighters = set(df['fighter_red'].dropna().unique()) | set(df['fighter_blue'].dropna().unique())
    
    # Initialize momentum columns (only the essential ones)
    for corner in ['red', 'blue']:
        result_df[f'performance_trend_{corner}'] = np.nan          # Shows trajectory
        result_df[f'finish_momentum_{corner}'] = np.nan           # How they win
        result_df[f'dominance_momentum_{corner}'] = np.nan        # Quality of wins
    
    for fighter in all_fighters:
        if pd.isna(fighter):
            continue
        
        # Get all fights for this fighter
        fighter_fights = []
        for idx, row in df.iterrows():
            if row['fighter_red'] == fighter or row['fighter_blue'] == fighter:
                stats = get_fighter_stats(df, fighter, df.index.get_loc(idx))
                if stats:
                    fighter_fights.append({
                        'index': idx,
                        'corner': 'red' if row['fighter_red'] == fighter else 'blue',
                        'stats': stats
                    })
        
        if len(fighter_fights) <= 2:  # Need at least 3 fights for trends
            continue
        
        # Extract win/loss sequence
        win_sequence = [fight['stats']['won'] for fight in fighter_fights]
        
        # 1. Performance Trend (linear trend of recent results)
        performance_trend = []
        for i in range(len(win_sequence)):
            if i < 2:
                performance_trend.append(np.nan)
            else:
                recent_results = win_sequence[max(0, i-4):i]  # Last 5 fights before current
                if len(recent_results) >= 3:
                    x = np.arange(len(recent_results))
                    try:
                        trend = np.polyfit(x, recent_results, 1)[0]  # Slope
                        performance_trend.append(trend)
                    except:
                        performance_trend.append(np.nan)
                else:
                    performance_trend.append(np.nan)
        
        # 2. Finish Momentum (recent finish rate)
        finish_momentum = []
        for i, fight in enumerate(fighter_fights):
            if i == 0:
                finish_momentum.append(np.nan)
                continue
            
            # Look at previous fights
            recent_fights = fighter_fights[max(0, i-5):i]  # Last 5 fights before current
            wins = [f for f in recent_fights if f['stats']['won'] == 1]
            
            if len(wins) > 0:
                finishes = [f for f in wins if pd.notna(f['stats']['win_method']) and 
                        f['stats']['win_method'] in ['Submission', 'KO/TKO', 'TKO']]
                finish_rate = len(finishes) / len(wins)
                finish_momentum.append(finish_rate)
            else:
                finish_momentum.append(0.0)
        
        # 3. Dominance Momentum (how decisively they win)
        dominance_momentum = []
        for i, fight in enumerate(fighter_fights):
            if i == 0:
                dominance_momentum.append(np.nan)
                continue
            
            # Look at previous fights
            recent_fights = fighter_fights[max(0, i-3):i]  # Last 3 fights before current
            dominance_scores = []
            
            for f in recent_fights:
                if f['stats']['won'] == 1:
                    # Score based on how they won
                    if pd.notna(f['stats']['round']) and f['stats']['round'] == 1:
                        dominance_scores.append(1.0)  # First round finish
                    elif pd.notna(f['stats']['win_method']) and f['stats']['win_method'] in ['Submission', 'KO/TKO', 'TKO']:
                        dominance_scores.append(0.8)  # Later round finish
                    else:
                        dominance_scores.append(0.6)  # Decision win
                else:
                    dominance_scores.append(0.0)  # Loss
            
            if len(dominance_scores) > 0:
                dominance_momentum.append(np.mean(dominance_scores))
            else:
                dominance_momentum.append(np.nan)
        
        # Apply momentum features to the dataframe
        for i, fight in enumerate(fighter_fights):
            idx = fight['index']
            corner = fight['corner']
            
            result_df.loc[idx, f'performance_trend_{corner}'] = performance_trend[i]
            result_df.loc[idx, f'finish_momentum_{corner}'] = finish_momentum[i]
            result_df.loc[idx, f'dominance_momentum_{corner}'] = dominance_momentum[i]
    
    return result_df

def engineer_essential_temporal_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Main function to apply essential temporal feature engineering only
    All features end with _red or _blue
    """
    print("Calculating essential EMA features...")
    df = calculate_essential_ema_features(df, alpha=0.3)
    
    print("Calculating essential rolling window features (5-fight window)...")
    df = calculate_essential_rolling_features(df, window=5)
    
    print("Calculating essential momentum features...")
    df = calculate_essential_momentum_features(df)
    
    print("Essential feature engineering complete!")
    
    # Print summary of new features
    temporal_cols = [col for col in df.columns if any(x in col for x in 
                    ['_ema_', '_roll_', '_momentum_', '_trend_'])]
    print(f"Added {len(temporal_cols)} new temporal features:")
    
    feature_types = {
        'EMA': [col for col in temporal_cols if '_ema_' in col],
        'Rolling': [col for col in temporal_cols if '_roll_' in col], 
        'Momentum': [col for col in temporal_cols if '_momentum_' in col or '_trend_' in col]
    }
    
    for ftype, cols in feature_types.items():
        print(f"  {ftype}: {len(cols)} features")
    
    return df

def handle_temporal_nans(df: pd.DataFrame) -> pd.DataFrame:
    """
    Handle NaN values in temporal features based on their specific context
    """
    df_filled = df.copy()
    
    # Get all temporal columns
    temporal_cols = [col for col in df.columns if any(x in col for x in 
                    ['_ema_', '_roll_', '_momentum_', '_trend_'])]
    
    print(f"Handling NaN values in {len(temporal_cols)} temporal features...")
    
    # Get all fighters
    all_fighters = set(df['fighter_red'].dropna().unique()) | set(df['fighter_blue'].dropna().unique())
    
    # 1. Handle early career NaNs (fighters with insufficient history)
    print("1. Filling early career NaNs...")
    for fighter in all_fighters:
        if pd.isna(fighter):
            continue
            
        fighter_fights = df_filled[
            (df_filled['fighter_red'] == fighter) | 
            (df_filled['fighter_blue'] == fighter)
        ].sort_values('event_date')
        
        if len(fighter_fights) <= 1:
            continue
        
        # Forward fill from first valid value for each temporal feature
        for col in temporal_cols:
            if col in fighter_fights.columns:
                fighter_values = fighter_fights[col].copy()
                first_valid_idx = fighter_values.first_valid_index()
                
                if first_valid_idx is not None:
                    first_valid_value = fighter_values.loc[first_valid_idx]
                    # Fill earlier NaNs with first valid value
                    early_fights = fighter_fights[fighter_fights.index < first_valid_idx]
                    for early_idx in early_fights.index:
                        if pd.isna(df_filled.loc[early_idx, col]):
                            df_filled.loc[early_idx, col] = first_valid_value
    
    # 2. Fill momentum features with contextually meaningful defaults
    print("2. Filling momentum feature NaNs...")
    
    # Performance trends: 0 (no trend)
    trend_cols = [col for col in temporal_cols if 'trend' in col]
    for col in trend_cols:
        df_filled[col] = df_filled[col].fillna(0.0)
    
    # Finish momentum: 0 (no finishing history)
    finish_cols = [col for col in temporal_cols if 'finish_momentum' in col]
    for col in finish_cols:
        df_filled[col] = df_filled[col].fillna(0.0)
    
    # Dominance momentum: Use conservative estimate (0.3 = low dominance)
    dominance_cols = [col for col in temporal_cols if 'dominance_momentum' in col]
    for col in dominance_cols:
        df_filled[col] = df_filled[col].fillna(0.3)
    
    # 3. Fill success rate features with population medians
    print("3. Filling success rate NaNs...")
    success_rate_cols = [col for col in temporal_cols if 'success_rate' in col]
    for col in success_rate_cols:
        if col in df_filled.columns:
            # Use dropna() here too to avoid warnings
            non_nan_pop = df_filled[col].dropna()
            if len(non_nan_pop) > 0:
                population_median = non_nan_pop.median()
                df_filled[col] = df_filled[col].fillna(population_median)
            else:
                # Fallback defaults based on typical MMA stats
                if 'sig_strikes' in col:
                    df_filled[col] = df_filled[col].fillna(0.45)  # ~45% striking accuracy
                elif 'takedowns' in col:
                    df_filled[col] = df_filled[col].fillna(0.35)  # ~35% takedown success
                else:
                    df_filled[col] = df_filled[col].fillna(0.0)

    # 4. Fill remaining EMA and rolling features with fighter-specific medians
    print("4. Filling remaining EMA and rolling NaNs...")
    remaining_cols = [col for col in temporal_cols if col not in (trend_cols + finish_cols + dominance_cols + success_rate_cols)]

    for fighter in all_fighters:
        if pd.isna(fighter):
            continue
            
        fighter_mask = (df_filled['fighter_red'] == fighter) | (df_filled['fighter_blue'] == fighter)
        fighter_data = df_filled[fighter_mask]
        
        if len(fighter_data) <= 1:
            continue
        
        for col in remaining_cols:
            if col in fighter_data.columns:
                # Use dropna() to avoid empty slice warnings
                non_nan_values = fighter_data[col].dropna()
                
                if len(non_nan_values) > 0:
                    fighter_median = non_nan_values.median()
                    df_filled.loc[fighter_mask & df_filled[col].isna(), col] = fighter_median
                else:
                    # No valid values for this fighter, use population median
                    pop_non_nan = df_filled[col].dropna()
                    if len(pop_non_nan) > 0:
                        pop_median = pop_non_nan.median()
                        df_filled.loc[fighter_mask & df_filled[col].isna(), col] = pop_median
                    else:
                        # Entire column is NaN, use 0 as fallback
                        df_filled.loc[fighter_mask & df_filled[col].isna(), col] = 0.0
    
    # 5. Final cleanup
    print("5. Final cleanup...")
    remaining_nans = df_filled[temporal_cols].isna().sum().sum()
    if remaining_nans > 0:
        print(f"   Filling {remaining_nans} remaining NaNs with 0...")
        for col in temporal_cols:
            df_filled[col] = df_filled[col].fillna(0.0)
    
    # Validation
    final_nans = df_filled[temporal_cols].isna().sum().sum()
    print(f"\nNaN Filling Complete!")
    print(f"All {len(temporal_cols)} temporal features now NaN-free")
    print(f"{len(df_filled)} fights ready for ML training")
    
    return df_filled

# Example usage
if __name__ == "__main__":
    print("Essential UFC Feature Engineering Ready!")

Essential UFC Feature Engineering Ready!


In [39]:
# Add essential temporal features
df_temporal_features = engineer_essential_temporal_features(df_processed)

# Handle NaN values appropriately
df_temporal_features_clean = handle_temporal_nans(df_temporal_features)

# Save clean dataset
df_temporal_features.to_csv('../data/notebooks/temporal_features.csv', index=False)
df_temporal_features_clean.to_csv('../data/notebooks/temporal_features_clean.csv', index=False)

Calculating essential EMA features...


Calculating essential rolling window features (5-fight window)...
Calculating essential momentum features...
Essential feature engineering complete!
Added 54 new temporal features:
  EMA: 28 features
  Rolling: 20 features
  Momentum: 6 features
Handling NaN values in 54 temporal features...
1. Filling early career NaNs...
2. Filling momentum feature NaNs...
3. Filling success rate NaNs...
4. Filling remaining EMA and rolling NaNs...
5. Final cleanup...
   Filling 714 remaining NaNs with 0...

NaN Filling Complete!
All 54 temporal features now NaN-free
8182 fights ready for ML training


In [40]:
df_temporal_features_clean = pd.read_csv('../data/notebooks/temporal_features_clean.csv')

In [41]:
df_temporal_features_clean

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red,avg_rounds_red,avg_time_red,avg_knockdowns_red,avg_sig_attempts_red,avg_sig_strikes_red,avg_total_strikes_attempts_red,avg_total_strikes_red,avg_sub_attempts_red,avg_takedowns_red,avg_takedown_attempts_red,avg_head_strikes_red,avg_head_attempts_red,avg_body_strikes_red,avg_body_attempts_red,avg_leg_strikes_red,avg_leg_attempts_red,avg_distance_red,avg_distance_attempts_red,avg_clinch_strikes_red,avg_clinch_attempts_red,avg_ground_strikes_red,avg_ground_attempts_red,avg_rounds_blue,avg_time_blue,avg_knockdowns_blue,avg_sig_attempts_blue,avg_sig_strikes_blue,avg_total_strikes_attempts_blue,avg_total_strikes_blue,avg_sub_attempts_blue,avg_takedowns_blue,avg_takedown_attempts_blue,avg_head_strikes_blue,avg_head_attempts_blue,avg_body_strikes_blue,avg_body_attempts_blue,avg_leg_strikes_blue,avg_leg_attempts_blue,avg_distance_blue,avg_distance_attempts_blue,avg_clinch_strikes_blue,avg_clinch_attempts_blue,avg_ground_strikes_blue,avg_ground_attempts_blue,wins_before_red,losses_before_red,total_fights_before_red,wins_before_blue,losses_before_blue,total_fights_before_blue,win_pct_before_red,win_pct_before_blue,wins_last_3_red,wins_last_3_blue,fights_last_3_red,fights_last_3_blue,days_since_debut_red,days_since_debut_blue,days_since_last_win_red,days_since_last_win_blue,recent_winner_red,recent_winner_blue,wins_last_365_days_red,wins_last_365_days_blue,fights_last_365_days_red,fights_last_365_days_blue,undefeated_last_year_red,undefeated_last_year_blue,win_streak_red,win_streak_blue,lose_streak_red,lose_streak_blue,on_win_streak_red,on_win_streak_blue,long_win_streak_red,long_win_streak_blue,sig_strikes_ema_red,sig_strikes_success_rate_ema_red,sig_attempts_ema_red,knockdowns_ema_red,knockdowns_success_rate_ema_red,takedowns_ema_red,takedowns_success_rate_ema_red,takedown_attempts_ema_red,total_strikes_ema_red,total_strikes_success_rate_ema_red,total_strikes_attempts_ema_red,head_strikes_ema_red,head_strikes_success_rate_ema_red,head_attempts_ema_red,sig_strikes_ema_blue,sig_strikes_success_rate_ema_blue,sig_attempts_ema_blue,knockdowns_ema_blue,knockdowns_success_rate_ema_blue,takedowns_ema_blue,takedowns_success_rate_ema_blue,takedown_attempts_ema_blue,total_strikes_ema_blue,total_strikes_success_rate_ema_blue,total_strikes_attempts_ema_blue,head_strikes_ema_blue,head_strikes_success_rate_ema_blue,head_attempts_ema_blue,sig_strikes_roll_5_red,sig_strikes_success_rate_roll_5_red,sig_attempts_roll_5_red,takedowns_roll_5_red,takedowns_success_rate_roll_5_red,takedown_attempts_roll_5_red,control_time_roll_5_red,knockdowns_roll_5_red,total_strikes_roll_5_red,total_strikes_attempts_roll_5_red,sig_strikes_roll_5_blue,sig_strikes_success_rate_roll_5_blue,sig_attempts_roll_5_blue,takedowns_roll_5_blue,takedowns_success_rate_roll_5_blue,takedown_attempts_roll_5_blue,control_time_roll_5_blue,knockdowns_roll_5_blue,total_strikes_roll_5_blue,total_strikes_attempts_roll_5_blue,performance_trend_red,finish_momentum_red,dominance_momentum_red,performance_trend_blue,finish_momentum_blue,dominance_momentum_blue
0,1994-03-11,UFC 2: No Way Out,jason delucia,scott baker,1,401,Open Weight,Submission,1,Southpaw,Orthodox,0,0,5,2,3,0,25,23,20,14,5,0,0,1,1,1,0,0,1,0,2,2,1,0,2,0,1,0,1,0,2,0,4,2,0,0,0,0,1,0,1,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,71.000000,190.000000,75.800000,40.600000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.820000,0.45,2.800000,0.000000,0.0,1.000000,0.35,1.510000,56.210000,0.544964,58.170000,0.630000,0.0,0.630000,3.000000,0.45,5.000000,0.000000,0.0,0.000000,0.35,1.0000,20.00000,0.800000,25.000000,1.000000,0.0,2.000000,2.0,0.500000,2.666667,1.00,0.666667,1.666667,0.00,0.0,40.333333,41.666667,3.0,0.600000,5.0,0.0,0.00,1.0,0.0,0.0,20.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1994-03-11,UFC 2: No Way Out,patrick smith,scott morris,1,30,Open Weight,KO/TKO,1,Orthodox,Orthodox,0,0,17,0,13,0,25,0,19,0,0,0,0,0,0,1,0,0,12,0,16,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,12,0,16,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,1.00,20.000000,0.0000,1.0000,1.000000,2.000000,2.000000,1.000000,1.0000,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000,0.0000,0.000000,1.000000,1.000000,0.00,0.000000,2,0,2,0,0,0,1.000,0.000,0,1,1,1,0,0,9999.0,0.0,0,1,2,1,3,1,0,1,0,0,0,0,0,0,0,0,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,1.000000,0.000000,0.964912,0.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,0.0,0.000000,0.000000,0.00,0.000000,1.000000,0.00,0.0,0.000000,0.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1994-03-11,UFC 2: No Way Out,royce gracie,minoki ichihara,1,308,Open Weight,Submission,1,Southpaw,Orthodox,0,0,4,7,2,3,114,16,110,12,2,0,1,0,1,0,0,0,0,3,0,6,0,0,0,0,2,0,4,1,2,0,4,2,0,0,0,0,0,3,0,5,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,1.000000,77.0000,0.000000,4.0000,4.0000,11.000000,11.00,0.000000,1.000000,2.000000,3.0,3.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.0000,3.000000,3.000000,,,,,,,,,,,,,,,,,,,,,,,2,0,2,0,0,0,1.000,0.000,1,0,1,0,0,0,9999.0,9999.0,0,0,3,0,3,0,1,0,0,0,0,0,0,0,0,0,2.000000,0.45,4.000000,0.000000,0.0,1.000000,0.35,1.000000,110.000000,0.964912,114.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,2.0,0.500000,4.000000,1.00,1.000000,1.000000,0.00,0.0,110.000000,114.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1994-03-11,UFC 2: No Way Out,remco pardoel,orlando wiet,1,89,Open Weight,KO/TKO,1,Southpaw,Southpaw,0,0,7,2,7,1,7,7,7,5,0,0,1,0,1,0,0,0,7,0,7,1,0,0,0,0,0,1,0,1,0,1,0,2,0,0,0,0,7,0,7,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1.000000,591.0000,0.000000,6.0000,4.0000,22.000000,20.00,1.000000,1.000000,1.000000,4.0,5.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.0000,4.000000,5.000000,1.00,170.000000,0.0000,12.0000,8.000000,15.000000,11.000000,0.000000,0.0000,0.000000,7.000000,11.000000,1.000000,1.000000,0.000000,0.000,1.0000,3.000000,0.000000,0.000000,7.00,9.000000,1,1,2,1,0,1,0.500,1.000,0,0,0,0,0,0,9999.0,9999.0,0,0,1,1,2,1,0,1,0,0,0,0,0,0,0,0,7.000000,0.45,7.000000,0.000000,0.0,1.000000,0.35,1.000000,7.000000,1.000000,7.000000,7.000000,0.0,7.000000,6.100000,0.45,6.700000,0.000000,0.0,1.000000,0.35,1.0000,10.90000,0.972727,11.500000,6.100000,0.0,6.400000,7.0,1.000000,7.000000,1.00,1.000000,1.000000,0.00,0.0,7.000000,7.000000,5.5,0.833333,6.5,1.0,1.00,1.0,0.0,0.0,13.50,14.5,0.0,0.0,0.0,0.0,0.0,0.0
4,1994-03-11,UFC 2: No Way Out,royce gracie,patrick smith,1,77,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,4,2,4,1,11,3,11,2,0,0,1,0,2,0,0,0,3,0,3,0,0,1,0,2,1,0,1,0,0,0,0,1,1,1,1,1,3,0,3,0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,,,,,,,,,,,,,,,,,,,,,,,1.00,51.666667,0.0000,10.0000,6.333333,12.666667,8.333333,0.666667,0.0000,0.333333,4.333333,6.666667,1.333333,1.333333,0.666667,2.000,1.0000,3.333333,1.333333,1.333333,4.00,5.333333,3,0,3,3,0,3,1.000,1.000,0,0,0,0,0,0,0.0,9999.0,1,0,2,2,2,2,1,1,1,1,0,0,1,1,0,0,2.000000,0.45,4.000000,0.000000,0.0,1.000000,0.35,1.000000,110.000000,0.964912,114.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,2.0,0.500000,4.000000,1.00,1.000000,1.000000,0.00,0.0,110.000000,114.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8177,2025-09-13,UFC Fight Night: Lopes vs. Silva,jesus aguilar,luis gurule,3,300,Flyweight,Decision - Unanimous,1,Orthodox,Orthodox,0,0,235,186,85,65,237,195,87,73,0,0,1,0,5,5,38,21,35,43,164,157,25,12,43,18,25,10,28,11,76,59,224,174,9,2,11,5,0,4,0,7,2025-04-05,65.000000,125.000000,64.000000,38.000000,5.0,5.0,0.41,0.00,0.00,738.0,2023-02-04,64.000000,125.500000,62.500000,36.000000,3.0,2.0,1.64,1.40,0.23,550.0,1.800000,207.2000,0.200000,26.6000,12.2000,33.400000,17.20,1.000000,0.600000,1.800000,8.0,21.000000,1.800000,2.000000,2.400000,3.600000,9.800000,21.400000,1.400000,3.0000,1.000000,2.200000,2.00,114.000000,0.0000,34.0000,12.000000,49.000000,26.000000,0.000000,1.0000,4.000000,3.000000,13.000000,0.000000,7.000000,9.000000,14.000,11.0000,32.000000,1.000000,2.000000,0.00,0.000000,3,2,5,0,1,1,0.600,0.000,2,0,3,1,952,161,392.0,9999.0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,14.609000,0.45,33.230900,0.102900,0.0,0.747000,0.35,2.088000,19.202000,0.654270,39.452900,10.002900,0.0,27.180900,12.000000,0.45,34.000000,0.000000,0.0,1.000000,0.35,4.0000,26.00000,0.530612,49.000000,3.000000,0.0,13.000000,12.2,0.639349,26.600000,0.60,0.130000,1.800000,81.00,0.2,17.200000,33.400000,12.0,0.352941,34.0,1.0,0.25,4.0,137.0,0.0,26.00,49.0,0.0,0.0,0.0,0.0,0.0,0.3
8178,2025-09-13,UFC Fight Night: Lopes vs. Silva,rodrigo sezinando,daniil donchenko,1,267,Open Weight,KO/TKO,0,Orthodox,Orthodox,0,1,36,68,10,39,36,71,10,42,0,0,1,1,4,3,62,63,7,29,30,55,3,8,6,11,0,2,0,2,7,23,33,44,3,6,3,9,0,10,0,15,2025-08-16,71.000000,186.000000,73.100000,40.500000,9.0,2.0,3.37,0.00,3.37,267.0,2025-08-16,69.000000,189.000000,71.000000,39.100000,2.0,9.0,3.37,0.00,0.00,267.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,0.000000,0.000000,0.544964,0.000000,0.000000,0.0,0.000000,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,0.0000,0.00000,0.549708,0.000000,0.000000,0.0,0.000000,0.0,0.468182,0.000000,0.00,0.266667,0.000000,0.00,0.0,0.000000,0.000000,0.0,0.466140,0.0,0.0,0.25,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.3,0.0,0.0,0.3
8179,2025-09-13,UFC Fight Night: Lopes vs. Silva,alessandro costa,alden coria,3,47,Flyweight,KO/TKO,0,Orthodox,Orthodox,0,0,56,95,12,42,59,131,14,77,2,0,1,2,4,2,84,198,7,34,37,85,1,1,6,3,4,7,13,7,12,29,56,79,0,0,0,0,0,13,0,16,2025-09-13,68.000000,125.000000,67.000000,38.200000,4.0,1.0,2.78,0.00,0.00,647.0,2022-12-17,64.000000,125.000000,67.000000,36.000000,4.0,4.0,0.46,0.46,0.69,656.0,2.500000,147.7500,0.500000,86.2500,45.5000,108.000000,62.00,0.000000,0.250000,2.000000,26.5,62.250000,8.500000,11.750000,10.500000,12.250000,32.500000,69.250000,5.250000,6.2500,7.750000,10.750000,,,,,,,,,,,,,,,,,,,,,,,2,2,4,0,0,0,0.500,0.000,2,0,3,0,1001,0,497.0,9999.0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,40.526000,0.45,83.900000,0.447000,0.0,0.210000,0.35,1.860000,58.439000,0.537121,108.515000,23.607000,0.0,61.891000,24.200000,0.45,74.000000,0.000000,0.0,0.000000,0.35,0.0000,42.05000,0.549708,107.000000,10.650000,0.0,55.450000,45.5,0.511327,86.250000,0.25,0.041667,2.000000,41.75,0.5,62.000000,108.000000,29.0,0.466140,76.0,0.0,0.25,0.0,10.5,0.0,44.75,107.0,0.0,0.0,0.0,0.0,0.0,0.3
8180,2025-09-13,UFC Fight Night: Lopes vs. Silva,diego lopes,jean silva,2,288,Featherweight,KO/TKO,1,Orthodox,Orthodox,1,0,135,91,74,43,154,91,86,43,0,0,3,0,4,0,160,3,63,26,118,67,1,10,1,17,10,7,16,7,31,42,65,90,1,1,2,1,42,0,68,0,2024-01-13,67.000000,146.000000,69.000000,36.500000,5.0,5.0,0.50,0.75,1.25,514.0,2023-05-06,71.000000,145.000000,72.500000,41.000000,4.0,5.0,0.79,1.74,0.79,631.0,2.428571,211.0000,0.571429,89.0000,42.0000,99.857143,52.00,1.142857,0.285714,0.714286,31.0,74.142857,5.142857,7.714286,5.857143,7.142857,29.428571,71.714286,4.285714,6.0000,8.285714,11.285714,1.80,181.800000,1.0000,58.0000,31.400000,58.600000,32.000000,0.600000,0.2000,0.200000,21.800000,46.200000,8.600000,10.600000,1.000000,1.200,23.8000,48.400000,5.200000,6.600000,2.40,3.000000,5,2,7,5,0,5,0.714,1.000,2,3,3,3,861,609,364.0,154.0,1,1,1,2,2,2,0,1,0,5,1,0,0,1,0,1,56.327803,0.45,129.068805,0.577830,0.0,0.357000,0.35,0.723030,67.827642,0.514481,141.607942,41.519051,0.0,109.014006,29.172100,0.45,54.896100,0.955900,0.0,0.240100,0.35,0.2401,29.87500,0.576372,55.599000,19.486800,0.0,42.792500,51.4,0.572806,110.000000,0.40,0.266667,1.000000,90.40,0.8,63.200000,122.600000,31.4,0.578821,58.0,0.2,0.20,0.2,8.4,1.0,32.00,58.6,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
df_temporal_features_clean.loc[
    (df_temporal_features_clean['fighter_red'] == 'paul craig') |
    (df_temporal_features_clean['fighter_blue'] == 'paul craig'),
    ['event_date', 'fighter_red', 'fighter_blue', 'winner', 'total_fights_before_blue',
    'sig_strikes_red', 'sig_strikes_blue', 'sig_strikes_ema_red', 'sig_attempts_ema_red', 'sig_strikes_ema_blue', 'sig_attempts_ema_blue']
]

Unnamed: 0,event_date,fighter_red,fighter_blue,winner,total_fights_before_blue,sig_strikes_red,sig_strikes_blue,sig_strikes_ema_red,sig_attempts_ema_red,sig_strikes_ema_blue,sig_attempts_ema_blue
3902,2016-12-17,henrique da silva,paul craig,0,0,16,31,69.9,118.0,31.0,49.0
3985,2017-03-04,tyson pedro,paul craig,1,1,23,12,4.0,7.0,31.0,49.0
4150,2017-07-16,khalil rountree jr,paul craig,1,2,15,17,8.39,15.6,25.3,41.5
4468,2018-03-17,paul craig,magomed ankalaev,1,0,18,59,22.81,37.75,15.0,24.0
4794,2018-12-01,jimmy crute,paul craig,1,4,17,12,17.0,38.0,21.367,41.425
4950,2019-03-30,kennedy nzechukwu,paul craig,0,5,40,25,14.1,27.7,18.5569,35.5975
5079,2019-06-29,alonzo menifield,paul craig,1,6,8,7,33.0,60.0,20.48983,48.01825
5207,2019-09-21,vinicius moreira,paul craig,0,7,1,18,8.3,15.6,16.442881,37.512775
5565,2020-07-25,paul craig,gadzhimurad antigulov,1,4,2,11,16.910017,34.358942,4.614,9.64
5740,2020-11-21,mauricio rua,paul craig,0,9,10,29,41.698547,79.8697,12.437012,25.55126


Convert event_date into year, month, day column

In [43]:
df_temporal_features_clean['year'] = pd.to_datetime(df_temporal_features_clean['event_date']).dt.year
df_temporal_features_clean['month'] = pd.to_datetime(df_temporal_features_clean['event_date']).dt.month  
df_temporal_features_clean['day_of_week'] = pd.to_datetime(df_temporal_features_clean['event_date']).dt.dayofweek

One-hot Encode weight_class, win_method and stance

In [44]:
df_temporal_features_clean

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,stance_red,stance_blue,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red,avg_rounds_red,avg_time_red,avg_knockdowns_red,avg_sig_attempts_red,avg_sig_strikes_red,avg_total_strikes_attempts_red,avg_total_strikes_red,avg_sub_attempts_red,avg_takedowns_red,avg_takedown_attempts_red,avg_head_strikes_red,avg_head_attempts_red,avg_body_strikes_red,avg_body_attempts_red,avg_leg_strikes_red,avg_leg_attempts_red,avg_distance_red,avg_distance_attempts_red,avg_clinch_strikes_red,avg_clinch_attempts_red,avg_ground_strikes_red,avg_ground_attempts_red,avg_rounds_blue,avg_time_blue,avg_knockdowns_blue,avg_sig_attempts_blue,avg_sig_strikes_blue,avg_total_strikes_attempts_blue,avg_total_strikes_blue,avg_sub_attempts_blue,avg_takedowns_blue,avg_takedown_attempts_blue,avg_head_strikes_blue,avg_head_attempts_blue,avg_body_strikes_blue,avg_body_attempts_blue,avg_leg_strikes_blue,avg_leg_attempts_blue,avg_distance_blue,avg_distance_attempts_blue,avg_clinch_strikes_blue,avg_clinch_attempts_blue,avg_ground_strikes_blue,avg_ground_attempts_blue,wins_before_red,losses_before_red,total_fights_before_red,wins_before_blue,losses_before_blue,total_fights_before_blue,win_pct_before_red,win_pct_before_blue,wins_last_3_red,wins_last_3_blue,fights_last_3_red,fights_last_3_blue,days_since_debut_red,days_since_debut_blue,days_since_last_win_red,days_since_last_win_blue,recent_winner_red,recent_winner_blue,wins_last_365_days_red,wins_last_365_days_blue,fights_last_365_days_red,fights_last_365_days_blue,undefeated_last_year_red,undefeated_last_year_blue,win_streak_red,win_streak_blue,lose_streak_red,lose_streak_blue,on_win_streak_red,on_win_streak_blue,long_win_streak_red,long_win_streak_blue,sig_strikes_ema_red,sig_strikes_success_rate_ema_red,sig_attempts_ema_red,knockdowns_ema_red,knockdowns_success_rate_ema_red,takedowns_ema_red,takedowns_success_rate_ema_red,takedown_attempts_ema_red,total_strikes_ema_red,total_strikes_success_rate_ema_red,total_strikes_attempts_ema_red,head_strikes_ema_red,head_strikes_success_rate_ema_red,head_attempts_ema_red,sig_strikes_ema_blue,sig_strikes_success_rate_ema_blue,sig_attempts_ema_blue,knockdowns_ema_blue,knockdowns_success_rate_ema_blue,takedowns_ema_blue,takedowns_success_rate_ema_blue,takedown_attempts_ema_blue,total_strikes_ema_blue,total_strikes_success_rate_ema_blue,total_strikes_attempts_ema_blue,head_strikes_ema_blue,head_strikes_success_rate_ema_blue,head_attempts_ema_blue,sig_strikes_roll_5_red,sig_strikes_success_rate_roll_5_red,sig_attempts_roll_5_red,takedowns_roll_5_red,takedowns_success_rate_roll_5_red,takedown_attempts_roll_5_red,control_time_roll_5_red,knockdowns_roll_5_red,total_strikes_roll_5_red,total_strikes_attempts_roll_5_red,sig_strikes_roll_5_blue,sig_strikes_success_rate_roll_5_blue,sig_attempts_roll_5_blue,takedowns_roll_5_blue,takedowns_success_rate_roll_5_blue,takedown_attempts_roll_5_blue,control_time_roll_5_blue,knockdowns_roll_5_blue,total_strikes_roll_5_blue,total_strikes_attempts_roll_5_blue,performance_trend_red,finish_momentum_red,dominance_momentum_red,performance_trend_blue,finish_momentum_blue,dominance_momentum_blue,year,month,day_of_week
0,1994-03-11,UFC 2: No Way Out,jason delucia,scott baker,1,401,Open Weight,Submission,1,Southpaw,Orthodox,0,0,5,2,3,0,25,23,20,14,5,0,0,1,1,1,0,0,1,0,2,2,1,0,2,0,1,0,1,0,2,0,4,2,0,0,0,0,1,0,1,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,71.000000,190.000000,75.800000,40.600000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.820000,0.45,2.800000,0.000000,0.0,1.000000,0.35,1.510000,56.210000,0.544964,58.170000,0.630000,0.0,0.630000,3.000000,0.45,5.000000,0.000000,0.0,0.000000,0.35,1.0000,20.00000,0.800000,25.000000,1.000000,0.0,2.000000,2.0,0.500000,2.666667,1.00,0.666667,1.666667,0.00,0.0,40.333333,41.666667,3.0,0.600000,5.0,0.0,0.00,1.0,0.0,0.0,20.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4
1,1994-03-11,UFC 2: No Way Out,patrick smith,scott morris,1,30,Open Weight,KO/TKO,1,Orthodox,Orthodox,0,0,17,0,13,0,25,0,19,0,0,0,0,0,0,1,0,0,12,0,16,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,12,0,16,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,1.00,20.000000,0.0000,1.0000,1.000000,2.000000,2.000000,1.000000,1.0000,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000,0.0000,0.000000,1.000000,1.000000,0.00,0.000000,2,0,2,0,0,0,1.000,0.000,0,1,1,1,0,0,9999.0,0.0,0,1,2,1,3,1,0,1,0,0,0,0,0,0,0,0,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,1.000000,0.000000,0.964912,0.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,0.0,0.000000,0.000000,0.00,0.000000,1.000000,0.00,0.0,0.000000,0.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4
2,1994-03-11,UFC 2: No Way Out,royce gracie,minoki ichihara,1,308,Open Weight,Submission,1,Southpaw,Orthodox,0,0,4,7,2,3,114,16,110,12,2,0,1,0,1,0,0,0,0,3,0,6,0,0,0,0,2,0,4,1,2,0,4,2,0,0,0,0,0,3,0,5,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,1.000000,77.0000,0.000000,4.0000,4.0000,11.000000,11.00,0.000000,1.000000,2.000000,3.0,3.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.0000,3.000000,3.000000,,,,,,,,,,,,,,,,,,,,,,,2,0,2,0,0,0,1.000,0.000,1,0,1,0,0,0,9999.0,9999.0,0,0,3,0,3,0,1,0,0,0,0,0,0,0,0,0,2.000000,0.45,4.000000,0.000000,0.0,1.000000,0.35,1.000000,110.000000,0.964912,114.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,2.0,0.500000,4.000000,1.00,1.000000,1.000000,0.00,0.0,110.000000,114.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4
3,1994-03-11,UFC 2: No Way Out,remco pardoel,orlando wiet,1,89,Open Weight,KO/TKO,1,Southpaw,Southpaw,0,0,7,2,7,1,7,7,7,5,0,0,1,0,1,0,0,0,7,0,7,1,0,0,0,0,0,1,0,1,0,1,0,2,0,0,0,0,7,0,7,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1.000000,591.0000,0.000000,6.0000,4.0000,22.000000,20.00,1.000000,1.000000,1.000000,4.0,5.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.0000,4.000000,5.000000,1.00,170.000000,0.0000,12.0000,8.000000,15.000000,11.000000,0.000000,0.0000,0.000000,7.000000,11.000000,1.000000,1.000000,0.000000,0.000,1.0000,3.000000,0.000000,0.000000,7.00,9.000000,1,1,2,1,0,1,0.500,1.000,0,0,0,0,0,0,9999.0,9999.0,0,0,1,1,2,1,0,1,0,0,0,0,0,0,0,0,7.000000,0.45,7.000000,0.000000,0.0,1.000000,0.35,1.000000,7.000000,1.000000,7.000000,7.000000,0.0,7.000000,6.100000,0.45,6.700000,0.000000,0.0,1.000000,0.35,1.0000,10.90000,0.972727,11.500000,6.100000,0.0,6.400000,7.0,1.000000,7.000000,1.00,1.000000,1.000000,0.00,0.0,7.000000,7.000000,5.5,0.833333,6.5,1.0,1.00,1.0,0.0,0.0,13.50,14.5,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4
4,1994-03-11,UFC 2: No Way Out,royce gracie,patrick smith,1,77,Open Weight,KO/TKO,1,Southpaw,Orthodox,0,0,4,2,4,1,11,3,11,2,0,0,1,0,2,0,0,0,3,0,3,0,0,1,0,2,1,0,1,0,0,0,0,1,1,1,1,1,3,0,3,0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,,,,,,,,,,,,,,,,,,,,,,,1.00,51.666667,0.0000,10.0000,6.333333,12.666667,8.333333,0.666667,0.0000,0.333333,4.333333,6.666667,1.333333,1.333333,0.666667,2.000,1.0000,3.333333,1.333333,1.333333,4.00,5.333333,3,0,3,3,0,3,1.000,1.000,0,0,0,0,0,0,0.0,9999.0,1,0,2,2,2,2,1,1,1,1,0,0,1,1,0,0,2.000000,0.45,4.000000,0.000000,0.0,1.000000,0.35,1.000000,110.000000,0.964912,114.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,2.0,0.500000,4.000000,1.00,1.000000,1.000000,0.00,0.0,110.000000,114.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8177,2025-09-13,UFC Fight Night: Lopes vs. Silva,jesus aguilar,luis gurule,3,300,Flyweight,Decision - Unanimous,1,Orthodox,Orthodox,0,0,235,186,85,65,237,195,87,73,0,0,1,0,5,5,38,21,35,43,164,157,25,12,43,18,25,10,28,11,76,59,224,174,9,2,11,5,0,4,0,7,2025-04-05,65.000000,125.000000,64.000000,38.000000,5.0,5.0,0.41,0.00,0.00,738.0,2023-02-04,64.000000,125.500000,62.500000,36.000000,3.0,2.0,1.64,1.40,0.23,550.0,1.800000,207.2000,0.200000,26.6000,12.2000,33.400000,17.20,1.000000,0.600000,1.800000,8.0,21.000000,1.800000,2.000000,2.400000,3.600000,9.800000,21.400000,1.400000,3.0000,1.000000,2.200000,2.00,114.000000,0.0000,34.0000,12.000000,49.000000,26.000000,0.000000,1.0000,4.000000,3.000000,13.000000,0.000000,7.000000,9.000000,14.000,11.0000,32.000000,1.000000,2.000000,0.00,0.000000,3,2,5,0,1,1,0.600,0.000,2,0,3,1,952,161,392.0,9999.0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,14.609000,0.45,33.230900,0.102900,0.0,0.747000,0.35,2.088000,19.202000,0.654270,39.452900,10.002900,0.0,27.180900,12.000000,0.45,34.000000,0.000000,0.0,1.000000,0.35,4.0000,26.00000,0.530612,49.000000,3.000000,0.0,13.000000,12.2,0.639349,26.600000,0.60,0.130000,1.800000,81.00,0.2,17.200000,33.400000,12.0,0.352941,34.0,1.0,0.25,4.0,137.0,0.0,26.00,49.0,0.0,0.0,0.0,0.0,0.0,0.3,2025,9,5
8178,2025-09-13,UFC Fight Night: Lopes vs. Silva,rodrigo sezinando,daniil donchenko,1,267,Open Weight,KO/TKO,0,Orthodox,Orthodox,0,1,36,68,10,39,36,71,10,42,0,0,1,1,4,3,62,63,7,29,30,55,3,8,6,11,0,2,0,2,7,23,33,44,3,6,3,9,0,10,0,15,2025-08-16,71.000000,186.000000,73.100000,40.500000,9.0,2.0,3.37,0.00,3.37,267.0,2025-08-16,69.000000,189.000000,71.000000,39.100000,2.0,9.0,3.37,0.00,0.00,267.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,0.000000,0.000000,0.544964,0.000000,0.000000,0.0,0.000000,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,0.0000,0.00000,0.549708,0.000000,0.000000,0.0,0.000000,0.0,0.468182,0.000000,0.00,0.266667,0.000000,0.00,0.0,0.000000,0.000000,0.0,0.466140,0.0,0.0,0.25,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.3,0.0,0.0,0.3,2025,9,5
8179,2025-09-13,UFC Fight Night: Lopes vs. Silva,alessandro costa,alden coria,3,47,Flyweight,KO/TKO,0,Orthodox,Orthodox,0,0,56,95,12,42,59,131,14,77,2,0,1,2,4,2,84,198,7,34,37,85,1,1,6,3,4,7,13,7,12,29,56,79,0,0,0,0,0,13,0,16,2025-09-13,68.000000,125.000000,67.000000,38.200000,4.0,1.0,2.78,0.00,0.00,647.0,2022-12-17,64.000000,125.000000,67.000000,36.000000,4.0,4.0,0.46,0.46,0.69,656.0,2.500000,147.7500,0.500000,86.2500,45.5000,108.000000,62.00,0.000000,0.250000,2.000000,26.5,62.250000,8.500000,11.750000,10.500000,12.250000,32.500000,69.250000,5.250000,6.2500,7.750000,10.750000,,,,,,,,,,,,,,,,,,,,,,,2,2,4,0,0,0,0.500,0.000,2,0,3,0,1001,0,497.0,9999.0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,40.526000,0.45,83.900000,0.447000,0.0,0.210000,0.35,1.860000,58.439000,0.537121,108.515000,23.607000,0.0,61.891000,24.200000,0.45,74.000000,0.000000,0.0,0.000000,0.35,0.0000,42.05000,0.549708,107.000000,10.650000,0.0,55.450000,45.5,0.511327,86.250000,0.25,0.041667,2.000000,41.75,0.5,62.000000,108.000000,29.0,0.466140,76.0,0.0,0.25,0.0,10.5,0.0,44.75,107.0,0.0,0.0,0.0,0.0,0.0,0.3,2025,9,5
8180,2025-09-13,UFC Fight Night: Lopes vs. Silva,diego lopes,jean silva,2,288,Featherweight,KO/TKO,1,Orthodox,Orthodox,1,0,135,91,74,43,154,91,86,43,0,0,3,0,4,0,160,3,63,26,118,67,1,10,1,17,10,7,16,7,31,42,65,90,1,1,2,1,42,0,68,0,2024-01-13,67.000000,146.000000,69.000000,36.500000,5.0,5.0,0.50,0.75,1.25,514.0,2023-05-06,71.000000,145.000000,72.500000,41.000000,4.0,5.0,0.79,1.74,0.79,631.0,2.428571,211.0000,0.571429,89.0000,42.0000,99.857143,52.00,1.142857,0.285714,0.714286,31.0,74.142857,5.142857,7.714286,5.857143,7.142857,29.428571,71.714286,4.285714,6.0000,8.285714,11.285714,1.80,181.800000,1.0000,58.0000,31.400000,58.600000,32.000000,0.600000,0.2000,0.200000,21.800000,46.200000,8.600000,10.600000,1.000000,1.200,23.8000,48.400000,5.200000,6.600000,2.40,3.000000,5,2,7,5,0,5,0.714,1.000,2,3,3,3,861,609,364.0,154.0,1,1,1,2,2,2,0,1,0,5,1,0,0,1,0,1,56.327803,0.45,129.068805,0.577830,0.0,0.357000,0.35,0.723030,67.827642,0.514481,141.607942,41.519051,0.0,109.014006,29.172100,0.45,54.896100,0.955900,0.0,0.240100,0.35,0.2401,29.87500,0.576372,55.599000,19.486800,0.0,42.792500,51.4,0.572806,110.000000,0.40,0.266667,1.000000,90.40,0.8,63.200000,122.600000,31.4,0.578821,58.0,0.2,0.20,0.2,8.4,1.0,32.00,58.6,0.0,0.0,0.0,0.0,0.0,0.0,2025,9,5


In [45]:
'''
# Encode weight_class and win_method together (no red/blue distinction needed)
general_cols = ['weight_class', 'win_method']
encoder_general = OneHotEncoder(sparse_output=False, drop='first')
general_encoded = encoder_general.fit_transform(df_temporal_features_clean[general_cols])
general_feature_names = encoder_general.get_feature_names_out(general_cols)

# Combine all encoded features
all_encoded = np.concatenate([general_encoded], axis=1)
all_feature_names = list(general_feature_names)

# Create final DataFrame
encoded_df = pd.DataFrame(all_encoded, columns=all_feature_names, index=df_temporal_features_clean.index)

# Concatenate and drop originals
df_temporal_features_clean = pd.concat([df_temporal_features_clean.drop(['weight_class', 'win_method'], axis=1), encoded_df], axis=1)

# Save general encoder
with open("../models/encoder_general.pkl", "wb") as f:
    pickle.dump(encoder_general, f)
'''

'\n# Encode weight_class and win_method together (no red/blue distinction needed)\ngeneral_cols = [\'weight_class\', \'win_method\']\nencoder_general = OneHotEncoder(sparse_output=False, drop=\'first\')\ngeneral_encoded = encoder_general.fit_transform(df_temporal_features_clean[general_cols])\ngeneral_feature_names = encoder_general.get_feature_names_out(general_cols)\n\n# Combine all encoded features\nall_encoded = np.concatenate([general_encoded], axis=1)\nall_feature_names = list(general_feature_names)\n\n# Create final DataFrame\nencoded_df = pd.DataFrame(all_encoded, columns=all_feature_names, index=df_temporal_features_clean.index)\n\n# Concatenate and drop originals\ndf_temporal_features_clean = pd.concat([df_temporal_features_clean.drop([\'weight_class\', \'win_method\'], axis=1), encoded_df], axis=1)\n\n# Save general encoder\nwith open("../models/encoder_general.pkl", "wb") as f:\n    pickle.dump(encoder_general, f)\n'

In [46]:
# Encode stance_red and stance_blue separately to preserve naming
encoder_stance = OneHotEncoder(sparse_output=False, drop='first')

encoder_stance.fit(df_temporal_features_clean[['stance_red']])

# Transform red stance
stance_red_encoded = encoder_stance.transform(df_temporal_features_clean[['stance_red']])
stance_red_names = [name.replace('stance_red_', '') + '_red'
                    for name in encoder_stance.get_feature_names_out(['stance_red'])]

# Transform blue stance (rename column temporarily to match fit)
stance_blue_encoded = encoder_stance.transform(
    df_temporal_features_clean[['stance_blue']].rename(columns={'stance_blue': 'stance_red'})
)
stance_blue_names = [name.replace('stance_red_', '') + '_blue'
                    for name in encoder_stance.get_feature_names_out(['stance_red'])]

# Combine all encoded features
all_encoded = np.concatenate([stance_red_encoded, stance_blue_encoded], axis=1)
all_feature_names = stance_red_names + stance_blue_names

# Create final DataFrame
encoded_df = pd.DataFrame(all_encoded, columns=all_feature_names, index=df_temporal_features_clean.index)

# Concatenate and drop originals
df_temporal_features_clean = pd.concat([df_temporal_features_clean.drop(['stance_red', 'stance_blue'], axis=1), encoded_df], axis=1)

# Save stance encoder
with open("../models/encoder_stance.pkl", "wb") as f:
    pickle.dump(encoder_stance, f)

In [47]:
df_temporal_features_clean

Unnamed: 0,event_date,event_name,fighter_red,fighter_blue,round,time,weight_class,win_method,winner,knockdowns_red,knockdowns_blue,sig_attempts_red,sig_attempts_blue,sig_strikes_red,sig_strikes_blue,total_strikes_attempts_red,total_strikes_attempts_blue,total_strikes_red,total_strikes_blue,sub_attempts_red,sub_attempts_blue,takedowns_red,takedowns_blue,takedown_attempts_red,takedown_attempts_blue,control_time_red,control_time_blue,head_strikes_red,head_strikes_blue,head_attempts_red,head_attempts_blue,body_strikes_red,body_strikes_blue,body_attempts_red,body_attempts_blue,leg_strikes_red,leg_strikes_blue,leg_attempts_red,leg_attempts_blue,distance_red,distance_blue,distance_attempts_red,distance_attempts_blue,clinch_strikes_red,clinch_strikes_blue,clinch_attempts_red,clinch_attempts_blue,ground_strikes_red,ground_strikes_blue,ground_attempts_red,ground_attempts_blue,octagon_debut_blue,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,octagon_debut_red,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red,avg_rounds_red,avg_time_red,avg_knockdowns_red,avg_sig_attempts_red,avg_sig_strikes_red,avg_total_strikes_attempts_red,avg_total_strikes_red,avg_sub_attempts_red,avg_takedowns_red,avg_takedown_attempts_red,avg_head_strikes_red,avg_head_attempts_red,avg_body_strikes_red,avg_body_attempts_red,avg_leg_strikes_red,avg_leg_attempts_red,avg_distance_red,avg_distance_attempts_red,avg_clinch_strikes_red,avg_clinch_attempts_red,avg_ground_strikes_red,avg_ground_attempts_red,avg_rounds_blue,avg_time_blue,avg_knockdowns_blue,avg_sig_attempts_blue,avg_sig_strikes_blue,avg_total_strikes_attempts_blue,avg_total_strikes_blue,avg_sub_attempts_blue,avg_takedowns_blue,avg_takedown_attempts_blue,avg_head_strikes_blue,avg_head_attempts_blue,avg_body_strikes_blue,avg_body_attempts_blue,avg_leg_strikes_blue,avg_leg_attempts_blue,avg_distance_blue,avg_distance_attempts_blue,avg_clinch_strikes_blue,avg_clinch_attempts_blue,avg_ground_strikes_blue,avg_ground_attempts_blue,wins_before_red,losses_before_red,total_fights_before_red,wins_before_blue,losses_before_blue,total_fights_before_blue,win_pct_before_red,win_pct_before_blue,wins_last_3_red,wins_last_3_blue,fights_last_3_red,fights_last_3_blue,days_since_debut_red,days_since_debut_blue,days_since_last_win_red,days_since_last_win_blue,recent_winner_red,recent_winner_blue,wins_last_365_days_red,wins_last_365_days_blue,fights_last_365_days_red,fights_last_365_days_blue,undefeated_last_year_red,undefeated_last_year_blue,win_streak_red,win_streak_blue,lose_streak_red,lose_streak_blue,on_win_streak_red,on_win_streak_blue,long_win_streak_red,long_win_streak_blue,sig_strikes_ema_red,sig_strikes_success_rate_ema_red,sig_attempts_ema_red,knockdowns_ema_red,knockdowns_success_rate_ema_red,takedowns_ema_red,takedowns_success_rate_ema_red,takedown_attempts_ema_red,total_strikes_ema_red,total_strikes_success_rate_ema_red,total_strikes_attempts_ema_red,head_strikes_ema_red,head_strikes_success_rate_ema_red,head_attempts_ema_red,sig_strikes_ema_blue,sig_strikes_success_rate_ema_blue,sig_attempts_ema_blue,knockdowns_ema_blue,knockdowns_success_rate_ema_blue,takedowns_ema_blue,takedowns_success_rate_ema_blue,takedown_attempts_ema_blue,total_strikes_ema_blue,total_strikes_success_rate_ema_blue,total_strikes_attempts_ema_blue,head_strikes_ema_blue,head_strikes_success_rate_ema_blue,head_attempts_ema_blue,sig_strikes_roll_5_red,sig_strikes_success_rate_roll_5_red,sig_attempts_roll_5_red,takedowns_roll_5_red,takedowns_success_rate_roll_5_red,takedown_attempts_roll_5_red,control_time_roll_5_red,knockdowns_roll_5_red,total_strikes_roll_5_red,total_strikes_attempts_roll_5_red,sig_strikes_roll_5_blue,sig_strikes_success_rate_roll_5_blue,sig_attempts_roll_5_blue,takedowns_roll_5_blue,takedowns_success_rate_roll_5_blue,takedown_attempts_roll_5_blue,control_time_roll_5_blue,knockdowns_roll_5_blue,total_strikes_roll_5_blue,total_strikes_attempts_roll_5_blue,performance_trend_red,finish_momentum_red,dominance_momentum_red,performance_trend_blue,finish_momentum_blue,dominance_momentum_blue,year,month,day_of_week,Orthodox_red,Sideways_red,Southpaw_red,Switch_red,Orthodox_blue,Sideways_blue,Southpaw_blue,Switch_blue
0,1994-03-11,UFC 2: No Way Out,jason delucia,scott baker,1,401,Open Weight,Submission,1,0,0,5,2,3,0,25,23,20,14,5,0,0,1,1,1,0,0,1,0,2,2,1,0,2,0,1,0,1,0,2,0,4,2,0,0,0,0,1,0,1,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,71.000000,190.000000,75.800000,40.600000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.820000,0.45,2.800000,0.000000,0.0,1.000000,0.35,1.510000,56.210000,0.544964,58.170000,0.630000,0.0,0.630000,3.000000,0.45,5.000000,0.000000,0.0,0.000000,0.35,1.0000,20.00000,0.800000,25.000000,1.000000,0.0,2.000000,2.0,0.500000,2.666667,1.00,0.666667,1.666667,0.00,0.0,40.333333,41.666667,3.0,0.600000,5.0,0.0,0.00,1.0,0.0,0.0,20.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
1,1994-03-11,UFC 2: No Way Out,patrick smith,scott morris,1,30,Open Weight,KO/TKO,1,0,0,17,0,13,0,25,0,19,0,0,0,0,0,0,1,0,0,12,0,16,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,12,0,16,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,,,,,,,,,,,,,,,,,,,,,,,1.00,20.000000,0.0000,1.0000,1.000000,2.000000,2.000000,1.000000,1.0000,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000,0.0000,0.000000,1.000000,1.000000,0.00,0.000000,2,0,2,0,0,0,1.000,0.000,0,1,1,1,0,0,9999.0,0.0,0,1,2,1,3,1,0,1,0,0,0,0,0,0,0,0,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,1.000000,0.000000,0.964912,0.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,0.0,0.000000,0.000000,0.00,0.000000,1.000000,0.00,0.0,0.000000,0.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1994-03-11,UFC 2: No Way Out,royce gracie,minoki ichihara,1,308,Open Weight,Submission,1,0,0,4,7,2,3,114,16,110,12,2,0,1,0,1,0,0,0,0,3,0,6,0,0,0,0,2,0,4,1,2,0,4,2,0,0,0,0,0,3,0,5,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,1.000000,77.0000,0.000000,4.0000,4.0000,11.000000,11.00,0.000000,1.000000,2.000000,3.0,3.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.000000,1.000000,1.0000,3.000000,3.000000,,,,,,,,,,,,,,,,,,,,,,,2,0,2,0,0,0,1.000,0.000,1,0,1,0,0,0,9999.0,9999.0,0,0,3,0,3,0,1,0,0,0,0,0,0,0,0,0,2.000000,0.45,4.000000,0.000000,0.0,1.000000,0.35,1.000000,110.000000,0.964912,114.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,2.0,0.500000,4.000000,1.00,1.000000,1.000000,0.00,0.0,110.000000,114.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
3,1994-03-11,UFC 2: No Way Out,remco pardoel,orlando wiet,1,89,Open Weight,KO/TKO,1,0,0,7,2,7,1,7,7,7,5,0,0,1,0,1,0,0,0,7,0,7,1,0,0,0,0,0,1,0,1,0,1,0,2,0,0,0,0,7,0,7,0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1994-03-11,70.140541,166.505447,71.587092,39.902128,0.0,0.0,0.00,0.00,0.00,0.0,1.000000,591.0000,0.000000,6.0000,4.0000,22.000000,20.00,1.000000,1.000000,1.000000,4.0,5.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.0000,4.000000,5.000000,1.00,170.000000,0.0000,12.0000,8.000000,15.000000,11.000000,0.000000,0.0000,0.000000,7.000000,11.000000,1.000000,1.000000,0.000000,0.000,1.0000,3.000000,0.000000,0.000000,7.00,9.000000,1,1,2,1,0,1,0.500,1.000,0,0,0,0,0,0,9999.0,9999.0,0,0,1,1,2,1,0,1,0,0,0,0,0,0,0,0,7.000000,0.45,7.000000,0.000000,0.0,1.000000,0.35,1.000000,7.000000,1.000000,7.000000,7.000000,0.0,7.000000,6.100000,0.45,6.700000,0.000000,0.0,1.000000,0.35,1.0000,10.90000,0.972727,11.500000,6.100000,0.0,6.400000,7.0,1.000000,7.000000,1.00,1.000000,1.000000,0.00,0.0,7.000000,7.000000,5.5,0.833333,6.5,1.0,1.00,1.0,0.0,0.0,13.50,14.5,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,1994-03-11,UFC 2: No Way Out,royce gracie,patrick smith,1,77,Open Weight,KO/TKO,1,0,0,4,2,4,1,11,3,11,2,0,0,1,0,2,0,0,0,3,0,3,0,0,1,0,2,1,0,1,0,0,0,0,1,1,1,1,1,3,0,3,0,1993-11-12,74.000000,225.000000,77.300000,42.500000,0.0,0.0,0.00,0.00,0.00,0.0,1993-11-12,73.000000,175.000000,74.800000,40.500000,1.0,1.0,0.00,0.76,0.00,630.0,,,,,,,,,,,,,,,,,,,,,,,1.00,51.666667,0.0000,10.0000,6.333333,12.666667,8.333333,0.666667,0.0000,0.333333,4.333333,6.666667,1.333333,1.333333,0.666667,2.000,1.0000,3.333333,1.333333,1.333333,4.00,5.333333,3,0,3,3,0,3,1.000,1.000,0,0,0,0,0,0,0.0,9999.0,1,0,2,2,2,2,1,1,1,1,0,0,1,1,0,0,2.000000,0.45,4.000000,0.000000,0.0,1.000000,0.35,1.000000,110.000000,0.964912,114.000000,0.000000,0.0,0.000000,13.000000,0.45,17.000000,0.000000,0.0,0.000000,0.35,0.0000,19.00000,0.760000,25.000000,12.000000,0.0,16.000000,2.0,0.500000,4.000000,1.00,1.000000,1.000000,0.00,0.0,110.000000,114.000000,13.0,0.764706,17.0,0.0,0.00,0.0,0.0,0.0,19.00,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8177,2025-09-13,UFC Fight Night: Lopes vs. Silva,jesus aguilar,luis gurule,3,300,Flyweight,Decision - Unanimous,1,0,0,235,186,85,65,237,195,87,73,0,0,1,0,5,5,38,21,35,43,164,157,25,12,43,18,25,10,28,11,76,59,224,174,9,2,11,5,0,4,0,7,2025-04-05,65.000000,125.000000,64.000000,38.000000,5.0,5.0,0.41,0.00,0.00,738.0,2023-02-04,64.000000,125.500000,62.500000,36.000000,3.0,2.0,1.64,1.40,0.23,550.0,1.800000,207.2000,0.200000,26.6000,12.2000,33.400000,17.20,1.000000,0.600000,1.800000,8.0,21.000000,1.800000,2.000000,2.400000,3.600000,9.800000,21.400000,1.400000,3.0000,1.000000,2.200000,2.00,114.000000,0.0000,34.0000,12.000000,49.000000,26.000000,0.000000,1.0000,4.000000,3.000000,13.000000,0.000000,7.000000,9.000000,14.000,11.0000,32.000000,1.000000,2.000000,0.00,0.000000,3,2,5,0,1,1,0.600,0.000,2,0,3,1,952,161,392.0,9999.0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,14.609000,0.45,33.230900,0.102900,0.0,0.747000,0.35,2.088000,19.202000,0.654270,39.452900,10.002900,0.0,27.180900,12.000000,0.45,34.000000,0.000000,0.0,1.000000,0.35,4.0000,26.00000,0.530612,49.000000,3.000000,0.0,13.000000,12.2,0.639349,26.600000,0.60,0.130000,1.800000,81.00,0.2,17.200000,33.400000,12.0,0.352941,34.0,1.0,0.25,4.0,137.0,0.0,26.00,49.0,0.0,0.0,0.0,0.0,0.0,0.3,2025,9,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8178,2025-09-13,UFC Fight Night: Lopes vs. Silva,rodrigo sezinando,daniil donchenko,1,267,Open Weight,KO/TKO,0,0,1,36,68,10,39,36,71,10,42,0,0,1,1,4,3,62,63,7,29,30,55,3,8,6,11,0,2,0,2,7,23,33,44,3,6,3,9,0,10,0,15,2025-08-16,71.000000,186.000000,73.100000,40.500000,9.0,2.0,3.37,0.00,3.37,267.0,2025-08-16,69.000000,189.000000,71.000000,39.100000,2.0,9.0,3.37,0.00,0.00,267.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0.000,0.000,0,0,0,0,0,0,9999.0,9999.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,0.000000,0.000000,0.544964,0.000000,0.000000,0.0,0.000000,0.000000,0.45,0.000000,0.000000,0.0,0.000000,0.35,0.0000,0.00000,0.549708,0.000000,0.000000,0.0,0.000000,0.0,0.468182,0.000000,0.00,0.266667,0.000000,0.00,0.0,0.000000,0.000000,0.0,0.466140,0.0,0.0,0.25,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.3,0.0,0.0,0.3,2025,9,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8179,2025-09-13,UFC Fight Night: Lopes vs. Silva,alessandro costa,alden coria,3,47,Flyweight,KO/TKO,0,0,0,56,95,12,42,59,131,14,77,2,0,1,2,4,2,84,198,7,34,37,85,1,1,6,3,4,7,13,7,12,29,56,79,0,0,0,0,0,13,0,16,2025-09-13,68.000000,125.000000,67.000000,38.200000,4.0,1.0,2.78,0.00,0.00,647.0,2022-12-17,64.000000,125.000000,67.000000,36.000000,4.0,4.0,0.46,0.46,0.69,656.0,2.500000,147.7500,0.500000,86.2500,45.5000,108.000000,62.00,0.000000,0.250000,2.000000,26.5,62.250000,8.500000,11.750000,10.500000,12.250000,32.500000,69.250000,5.250000,6.2500,7.750000,10.750000,,,,,,,,,,,,,,,,,,,,,,,2,2,4,0,0,0,0.500,0.000,2,0,3,0,1001,0,497.0,9999.0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,40.526000,0.45,83.900000,0.447000,0.0,0.210000,0.35,1.860000,58.439000,0.537121,108.515000,23.607000,0.0,61.891000,24.200000,0.45,74.000000,0.000000,0.0,0.000000,0.35,0.0000,42.05000,0.549708,107.000000,10.650000,0.0,55.450000,45.5,0.511327,86.250000,0.25,0.041667,2.000000,41.75,0.5,62.000000,108.000000,29.0,0.466140,76.0,0.0,0.25,0.0,10.5,0.0,44.75,107.0,0.0,0.0,0.0,0.0,0.0,0.3,2025,9,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8180,2025-09-13,UFC Fight Night: Lopes vs. Silva,diego lopes,jean silva,2,288,Featherweight,KO/TKO,1,1,0,135,91,74,43,154,91,86,43,0,0,3,0,4,0,160,3,63,26,118,67,1,10,1,17,10,7,16,7,31,42,65,90,1,1,2,1,42,0,68,0,2024-01-13,67.000000,146.000000,69.000000,36.500000,5.0,5.0,0.50,0.75,1.25,514.0,2023-05-06,71.000000,145.000000,72.500000,41.000000,4.0,5.0,0.79,1.74,0.79,631.0,2.428571,211.0000,0.571429,89.0000,42.0000,99.857143,52.00,1.142857,0.285714,0.714286,31.0,74.142857,5.142857,7.714286,5.857143,7.142857,29.428571,71.714286,4.285714,6.0000,8.285714,11.285714,1.80,181.800000,1.0000,58.0000,31.400000,58.600000,32.000000,0.600000,0.2000,0.200000,21.800000,46.200000,8.600000,10.600000,1.000000,1.200,23.8000,48.400000,5.200000,6.600000,2.40,3.000000,5,2,7,5,0,5,0.714,1.000,2,3,3,3,861,609,364.0,154.0,1,1,1,2,2,2,0,1,0,5,1,0,0,1,0,1,56.327803,0.45,129.068805,0.577830,0.0,0.357000,0.35,0.723030,67.827642,0.514481,141.607942,41.519051,0.0,109.014006,29.172100,0.45,54.896100,0.955900,0.0,0.240100,0.35,0.2401,29.87500,0.576372,55.599000,19.486800,0.0,42.792500,51.4,0.572806,110.000000,0.40,0.266667,1.000000,90.40,0.8,63.200000,122.600000,31.4,0.578821,58.0,0.2,0.20,0.2,8.4,1.0,32.00,58.6,0.0,0.0,0.0,0.0,0.0,0.0,2025,9,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


Drop unneccesary columns

In [48]:
df_temporal_features_clean.drop(columns=['event_date', 'round', 'time', 'event_name', 'weight_class', 'win_method', 'fighter_blue', 'fighter_red', 'octagon_debut_blue', 'octagon_debut_red',
                                        'knockdowns_red','knockdowns_blue','sig_attempts_red','sig_attempts_blue','sig_strikes_red','sig_strikes_blue','total_strikes_attempts_red',
                                        'total_strikes_attempts_blue','total_strikes_red','total_strikes_blue','sub_attempts_red','sub_attempts_blue','takedowns_red',
                                        'takedowns_blue','takedown_attempts_red','takedown_attempts_blue','control_time_red','control_time_blue','head_strikes_red',
                                        'head_strikes_blue','head_attempts_red','head_attempts_blue','body_strikes_red','body_strikes_blue','body_attempts_red',
                                        'body_attempts_blue','leg_strikes_red','leg_strikes_blue','leg_attempts_red','leg_attempts_blue','distance_red','distance_blue',
                                        'distance_attempts_red','distance_attempts_blue','clinch_strikes_red','clinch_strikes_blue','clinch_attempts_red','clinch_attempts_blue',
                                        'ground_strikes_red','ground_strikes_blue','ground_attempts_red','ground_attempts_blue'], inplace=True)

Drop Debut Fighters (at least 1 fight required)

In [49]:
df_temporal_features_clean = df_temporal_features_clean[
    (df_temporal_features_clean['total_fights_before_blue'] > 1) & 
    (df_temporal_features_clean['total_fights_before_red'] > 1)
]

print("Size after dropping debuts:", len(df_temporal_features_clean))

Size after dropping debuts: 4592


In [50]:
missing_df = pd.DataFrame({
    'missing_count': df_temporal_features_clean.isnull().sum(),
    'missing_precent': round((df_temporal_features_clean.isnull().sum() / len(df_temporal_features_clean)) * 100, 2)
}).sort_values(by='missing_precent', ascending=False)

missing_df

Unnamed: 0,missing_count,missing_precent
avg_sub_attempts_red,2,0.04
avg_total_strikes_attempts_red,2,0.04
avg_distance_red,2,0.04
avg_rounds_red,2,0.04
avg_time_red,2,0.04
...,...,...
avg_clinch_attempts_blue,0,0.00
avg_clinch_strikes_blue,0,0.00
avg_distance_attempts_blue,0,0.00
avg_distance_blue,0,0.00


In [51]:
df_temporal_features_clean[df_temporal_features_clean['avg_total_strikes_red'].isnull()]

Unnamed: 0,winner,height_blue,weight_blue,reach_blue,leg_reach_blue,sig_strikes_landed_per_minute_blue,sig_strikes_absorbed_per_minute_blue,takedowns_avg_blue,submission_avg_blue,knockdown_avg_blue,fight_time_avg_blue,height_red,weight_red,reach_red,leg_reach_red,sig_strikes_landed_per_minute_red,sig_strikes_absorbed_per_minute_red,takedowns_avg_red,submission_avg_red,knockdown_avg_red,fight_time_avg_red,avg_rounds_red,avg_time_red,avg_knockdowns_red,avg_sig_attempts_red,avg_sig_strikes_red,avg_total_strikes_attempts_red,avg_total_strikes_red,avg_sub_attempts_red,avg_takedowns_red,avg_takedown_attempts_red,avg_head_strikes_red,avg_head_attempts_red,avg_body_strikes_red,avg_body_attempts_red,avg_leg_strikes_red,avg_leg_attempts_red,avg_distance_red,avg_distance_attempts_red,avg_clinch_strikes_red,avg_clinch_attempts_red,avg_ground_strikes_red,avg_ground_attempts_red,avg_rounds_blue,avg_time_blue,avg_knockdowns_blue,avg_sig_attempts_blue,avg_sig_strikes_blue,avg_total_strikes_attempts_blue,avg_total_strikes_blue,avg_sub_attempts_blue,avg_takedowns_blue,avg_takedown_attempts_blue,avg_head_strikes_blue,avg_head_attempts_blue,avg_body_strikes_blue,avg_body_attempts_blue,avg_leg_strikes_blue,avg_leg_attempts_blue,avg_distance_blue,avg_distance_attempts_blue,avg_clinch_strikes_blue,avg_clinch_attempts_blue,avg_ground_strikes_blue,avg_ground_attempts_blue,wins_before_red,losses_before_red,total_fights_before_red,wins_before_blue,losses_before_blue,total_fights_before_blue,win_pct_before_red,win_pct_before_blue,wins_last_3_red,wins_last_3_blue,fights_last_3_red,fights_last_3_blue,days_since_debut_red,days_since_debut_blue,days_since_last_win_red,days_since_last_win_blue,recent_winner_red,recent_winner_blue,wins_last_365_days_red,wins_last_365_days_blue,fights_last_365_days_red,fights_last_365_days_blue,undefeated_last_year_red,undefeated_last_year_blue,win_streak_red,win_streak_blue,lose_streak_red,lose_streak_blue,on_win_streak_red,on_win_streak_blue,long_win_streak_red,long_win_streak_blue,sig_strikes_ema_red,sig_strikes_success_rate_ema_red,sig_attempts_ema_red,knockdowns_ema_red,knockdowns_success_rate_ema_red,takedowns_ema_red,takedowns_success_rate_ema_red,takedown_attempts_ema_red,total_strikes_ema_red,total_strikes_success_rate_ema_red,total_strikes_attempts_ema_red,head_strikes_ema_red,head_strikes_success_rate_ema_red,head_attempts_ema_red,sig_strikes_ema_blue,sig_strikes_success_rate_ema_blue,sig_attempts_ema_blue,knockdowns_ema_blue,knockdowns_success_rate_ema_blue,takedowns_ema_blue,takedowns_success_rate_ema_blue,takedown_attempts_ema_blue,total_strikes_ema_blue,total_strikes_success_rate_ema_blue,total_strikes_attempts_ema_blue,head_strikes_ema_blue,head_strikes_success_rate_ema_blue,head_attempts_ema_blue,sig_strikes_roll_5_red,sig_strikes_success_rate_roll_5_red,sig_attempts_roll_5_red,takedowns_roll_5_red,takedowns_success_rate_roll_5_red,takedown_attempts_roll_5_red,control_time_roll_5_red,knockdowns_roll_5_red,total_strikes_roll_5_red,total_strikes_attempts_roll_5_red,sig_strikes_roll_5_blue,sig_strikes_success_rate_roll_5_blue,sig_attempts_roll_5_blue,takedowns_roll_5_blue,takedowns_success_rate_roll_5_blue,takedown_attempts_roll_5_blue,control_time_roll_5_blue,knockdowns_roll_5_blue,total_strikes_roll_5_blue,total_strikes_attempts_roll_5_blue,performance_trend_red,finish_momentum_red,dominance_momentum_red,performance_trend_blue,finish_momentum_blue,dominance_momentum_blue,year,month,day_of_week,Orthodox_red,Sideways_red,Southpaw_red,Switch_red,Orthodox_blue,Sideways_blue,Southpaw_blue,Switch_blue
4,1,74.0,225.0,77.3,42.5,0.0,0.0,0.0,0.0,0.0,0.0,73.0,175.0,74.8,40.5,1.0,1.0,0.0,0.76,0.0,630.0,,,,,,,,,,,,,,,,,,,,,,,1.0,51.666667,0.0,10.0,6.333333,12.666667,8.333333,0.666667,0.0,0.333333,4.333333,6.666667,1.333333,1.333333,0.666667,2.0,1.0,3.333333,1.333333,1.333333,4.0,5.333333,3,0,3,3,0,3,1.0,1.0,0,0,0,0,0,0,0.0,9999.0,1,0,2,2,2,2,1,1,1,1,0,0,1,1,0,0,2.0,0.45,4.0,0.0,0.0,1.0,0.35,1.0,110.0,0.964912,114.0,0.0,0.0,0.0,13.0,0.45,17.0,0.0,0.0,0.0,0.35,0.0,19.0,0.76,25.0,12.0,0.0,16.0,2.0,0.5,4.0,1.0,1.0,1.0,0.0,0.0,110.0,114.0,13.0,0.764706,17.0,0.0,0.0,0.0,0.0,0.0,19.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1994,3,4,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
80,1,70.140541,166.505447,71.587092,39.902128,2.0,2.0,0.4,0.8,0.8,286.0,73.0,205.0,75.7,41.4,2.0,3.0,3.89,0.78,0.0,420.0,,,,,,,,,,,,,,,,,,,,,,,1.0,257.2,0.0,6.0,3.8,7.6,5.2,0.2,0.6,1.0,3.4,5.4,0.2,0.4,0.2,0.2,0.6,2.2,0.4,0.6,2.8,3.2,2,0,2,3,2,5,1.0,0.6,0,2,0,3,0,147,9999.0,147.0,0,1,2,3,2,5,1,0,0,0,0,1,0,0,0,0,33.0,0.45,48.0,0.0,0.0,2.0,0.35,2.0,51.0,0.728571,70.0,33.0,0.0,48.0,3.107,0.45,5.82,0.0,0.0,0.7,0.35,1.386,4.295,0.555805,7.308,2.464,0.0,4.834,33.0,0.6875,48.0,2.0,1.0,2.0,0.0,0.0,51.0,70.0,3.5,0.520833,6.0,0.75,0.583333,1.25,0.0,0.0,5.0,7.75,0.0,0.0,0.0,0.0,0.0,0.0,1996,7,4,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [52]:
df_temporal_features_clean = df_temporal_features_clean.dropna(subset=['avg_total_strikes_red'])

Calculate the difference

In [53]:
# Find numeric _blue columns
blue_cols = [col for col in df_temporal_features_clean.columns 
            if col.endswith('_blue') and pd.api.types.is_numeric_dtype(df_temporal_features_clean[col])]

# Create all difference columns at once
diff_data = {}
cols_to_drop = []

for blue_col in blue_cols:
    red_col = blue_col.replace('_blue', '_red')
    if red_col in df_temporal_features_clean.columns and pd.api.types.is_numeric_dtype(df_temporal_features_clean[red_col]):
        diff_col = blue_col.replace('_blue', '_diff')
        diff_data[diff_col] = df_temporal_features_clean[blue_col] - df_temporal_features_clean[red_col]
        cols_to_drop.extend([blue_col, red_col])

# Create difference DataFrame and concatenate
if diff_data:
    diff_df = pd.DataFrame(diff_data, index=df_temporal_features_clean.index)
    
    # Drop old columns and add new ones in one operation
    df_temporal_features_clean = df_temporal_features_clean.drop(cols_to_drop, axis=1)
    df_temporal_features_clean = pd.concat([df_temporal_features_clean, diff_df], axis=1)

# Defragment the DataFrame
df_temporal_features_clean = df_temporal_features_clean.copy()

In [54]:
df_temporal_features_clean

Unnamed: 0,winner,year,month,day_of_week,height_diff,weight_diff,reach_diff,leg_reach_diff,sig_strikes_landed_per_minute_diff,sig_strikes_absorbed_per_minute_diff,takedowns_avg_diff,submission_avg_diff,knockdown_avg_diff,fight_time_avg_diff,avg_rounds_diff,avg_time_diff,avg_knockdowns_diff,avg_sig_attempts_diff,avg_sig_strikes_diff,avg_total_strikes_attempts_diff,avg_total_strikes_diff,avg_sub_attempts_diff,avg_takedowns_diff,avg_takedown_attempts_diff,avg_head_strikes_diff,avg_head_attempts_diff,avg_body_strikes_diff,avg_body_attempts_diff,avg_leg_strikes_diff,avg_leg_attempts_diff,avg_distance_diff,avg_distance_attempts_diff,avg_clinch_strikes_diff,avg_clinch_attempts_diff,avg_ground_strikes_diff,avg_ground_attempts_diff,wins_before_diff,losses_before_diff,total_fights_before_diff,win_pct_before_diff,wins_last_3_diff,fights_last_3_diff,days_since_debut_diff,days_since_last_win_diff,recent_winner_diff,wins_last_365_days_diff,fights_last_365_days_diff,undefeated_last_year_diff,win_streak_diff,lose_streak_diff,on_win_streak_diff,long_win_streak_diff,sig_strikes_ema_diff,sig_strikes_success_rate_ema_diff,sig_attempts_ema_diff,knockdowns_ema_diff,knockdowns_success_rate_ema_diff,takedowns_ema_diff,takedowns_success_rate_ema_diff,takedown_attempts_ema_diff,total_strikes_ema_diff,total_strikes_success_rate_ema_diff,total_strikes_attempts_ema_diff,head_strikes_ema_diff,head_strikes_success_rate_ema_diff,head_attempts_ema_diff,sig_strikes_roll_5_diff,sig_strikes_success_rate_roll_5_diff,sig_attempts_roll_5_diff,takedowns_roll_5_diff,takedowns_success_rate_roll_5_diff,takedown_attempts_roll_5_diff,control_time_roll_5_diff,knockdowns_roll_5_diff,total_strikes_roll_5_diff,total_strikes_attempts_roll_5_diff,performance_trend_diff,finish_momentum_diff,dominance_momentum_diff,Orthodox_diff,Sideways_diff,Southpaw_diff,Switch_diff
27,1,1994,12,4,-2.859459,-8.494553,-3.212908,-0.597872,-1.0,-1.0,0.00,-0.76,0.00,-630.0,0.000000,-119.214286,0.000000,-6.857143,-3.714286,-33.571429,-30.000000,-0.285714,0.928571,1.571429,-2.285714,-3.857143,-0.714286,-1.428571,-0.714286,-1.571429,-0.571429,-2.142857,-1.714286,-2.857143,-1.428571,-1.857143,-5,0,-5,0.000,-1,-1,-280,0.0,0,-5,-5,0,-5,0,0,-1,-5.500982,0.0,-10.764280,0.000000,0.0,0.849900,0.0,0.733449,-35.165021,-0.319530,-40.963617,-3.448263,0.0,-6.478263,-4.000000,-0.289592,-8.000000,1.100000,0.400000,1.600000,0.0,0.000000,-17.800000,-22.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40,1,1995,7,4,-2.859459,-37.494553,-3.012908,-1.997872,-1.0,-4.0,-0.94,-2.52,-0.31,-333.0,0.000000,-7.166667,0.000000,-1.333333,-3.166667,9.666667,3.166667,0.166667,0.166667,0.166667,-0.333333,1.333333,-2.500000,-2.333333,-0.333333,-0.333333,0.000000,-0.500000,-1.333333,-1.166667,-1.833333,0.333333,3,1,4,-0.167,1,1,-98,-210.0,0,3,4,-1,1,0,0,1,-3.604400,0.0,-1.568700,0.000000,0.0,0.072030,0.0,-0.011880,2.675300,-0.356843,9.375700,-0.751400,0.0,1.137300,-2.600000,-0.490943,-0.300000,0.200000,0.130000,0.300000,0.0,0.000000,6.100000,14.300000,0.0,0.0,0.0,-1.0,0.0,1.0,0.0
43,1,1995,7,4,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,0.00,0.0,0.000000,-258.166667,0.000000,9.833333,6.333333,3.916667,1.000000,-1.500000,0.666667,-0.333333,6.333333,9.833333,0.000000,0.000000,0.000000,0.000000,-0.750000,-1.916667,0.416667,1.416667,6.666667,10.333333,-1,0,-1,-0.083,0,0,0,98.0,0,-1,-1,0,-2,1,-1,0,6.960000,0.0,10.350000,0.000000,0.0,0.700000,0.0,-0.543000,-2.400000,0.397599,0.000000,6.960000,0.0,10.350000,6.333333,0.438462,9.833333,0.666667,0.666667,-0.333333,0.0,0.000000,1.000000,3.916667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50,1,1995,9,4,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,0.00,0.0,0.000000,-399.750000,0.250000,3.750000,1.250000,11.250000,9.250000,-2.000000,0.000000,0.000000,2.250000,5.250000,-0.500000,-0.250000,-0.500000,-1.250000,0.250000,2.000000,1.000000,2.250000,0.000000,-0.500000,1,1,2,-0.250,1,2,56,0.0,0,1,2,0,0,0,0,0,0.730000,0.0,5.100000,0.490000,0.0,0.300000,0.0,0.300000,-1.400000,-0.013333,-0.030000,3.030000,0.0,8.420000,-0.333333,-0.061328,2.000000,0.333333,0.333333,0.333333,0.0,0.333333,-1.666667,-2.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,1,1995,12,5,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,0.00,0.0,0.000000,169.500000,0.250000,16.850000,10.450000,16.550000,8.900000,0.100000,0.500000,-0.250000,3.200000,6.600000,0.500000,0.500000,6.750000,9.750000,6.900000,11.950000,0.550000,0.400000,3.000000,4.500000,-1,-1,-2,0.200,0,0,-154,0.0,0,-1,-3,1,0,0,0,0,8.350000,0.0,13.420000,0.210000,0.0,0.300000,0.0,-0.870100,5.750000,0.349311,12.570000,1.840000,0.0,3.830000,11.866667,0.630250,19.933333,0.333333,0.333333,-0.666667,0.0,0.333333,11.733333,21.466667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8170,0,2025,9,5,-2.000000,-0.500000,-3.000000,-1.000000,1.0,1.0,-2.95,-0.20,0.46,-44.0,-0.045455,-27.477273,0.272727,53.477273,21.045455,52.159091,20.750000,-0.329545,-2.102273,-5.954545,12.068182,42.988636,5.284091,6.931818,3.693182,3.556818,20.113636,51.590909,5.363636,6.636364,-4.431818,-4.750000,1,2,3,-0.080,0,0,427,-588.0,0,0,0,0,0,-1,0,0,30.488614,0.0,56.267269,0.415006,0.0,-2.390353,0.0,-6.757634,58.064681,0.058997,83.773904,16.142542,0.0,39.617907,20.200000,0.081382,37.200000,-2.000000,-0.199359,-5.800000,-243.2,0.400000,36.200000,50.600000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8172,1,2025,9,5,2.000000,1.000000,-2.000000,1.000000,2.0,-1.0,-1.35,-0.45,0.05,-151.0,-0.377778,-25.088889,0.133333,-9.844444,9.955556,22.111111,39.822222,-0.222222,-1.488889,-2.466667,5.444444,-14.688889,2.533333,3.088889,1.977778,1.755556,-2.911111,-26.066667,3.022222,4.244444,9.844444,11.977778,4,2,6,0.044,0,0,1357,-49.0,0,0,-1,1,0,0,0,0,5.716896,0.0,-1.732122,0.447000,0.0,-1.142370,0.0,-0.807676,19.681279,0.028527,14.871872,1.806595,0.0,-5.857730,13.400000,0.053380,16.200000,-1.400000,0.069697,-1.800000,53.6,0.400000,27.600000,31.400000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8173,1,2025,9,5,-1.000000,1.500000,-1.000000,-3.000000,0.0,1.0,-3.39,0.15,0.61,-74.0,-0.192308,-12.903846,0.461538,-5.307692,-7.144231,-33.740385,-31.442308,-0.038462,-2.730769,-6.144231,-0.826923,3.067308,-3.567308,-4.634615,-2.750000,-3.740385,9.663462,13.567308,-0.375000,-0.298077,-16.432692,-18.576923,2,3,5,-0.183,0,0,-373,-581.0,1,1,0,1,1,-1,1,0,-8.187963,0.0,-11.586701,0.221687,0.0,-1.470202,0.0,-6.667929,-29.197708,-0.102966,-35.874081,1.271029,0.0,2.389460,-18.800000,-0.140517,-22.400000,-2.600000,-0.085175,-7.800000,-225.2,0.200000,-40.400000,-47.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8180,1,2025,9,5,-4.000000,1.000000,-3.500000,-4.500000,1.0,0.0,-0.29,-0.99,0.46,-117.0,-0.628571,-29.200000,0.428571,-31.000000,-10.600000,-41.257143,-20.000000,-0.542857,-0.085714,-0.514286,-9.200000,-27.942857,3.457143,2.885714,-4.857143,-5.942857,-5.628571,-23.314286,0.914286,0.600000,-5.885714,-8.285714,0,-2,-2,0.286,1,0,-252,-210.0,0,1,0,1,5,-1,1,1,-27.155703,0.0,-74.172705,0.378070,0.0,-0.116900,0.0,-0.482930,-37.952642,0.061891,-86.008942,-22.032251,0.0,-66.221506,-20.000000,0.006015,-52.000000,-0.200000,-0.066667,-0.800000,-82.0,0.200000,-31.200000,-64.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
df_temporal_features_clean.to_csv('../data/notebooks/features_difference.csv', index=False)